/* WMN 
 * Most sequence functions
*/

#include <gtk/gtk.h>
#include <stdio.h>
#include <glib.h>
#include <math.h>
#include <ctype.h>
#include "fpp.h"
#include "tree.h"
#include "seq.h"
#include "bss.h"
#include "gtktrack.h"
//#include "newbss.h"

typedef struct _aligndata
{
	int fpc_ctg;
	char seq_name[100];
	int ctg_start,ctg_end,seq_start,seq_end;
	char orient;
	char spans_ctg;
	char spans_seq;
} AlignData;

extern void lowerstring(char *search);
extern void show_help(char*,char*);
extern FILE *graphQueryOpen();

void dump_seq_tree();
void traverse_seq_tree();
void add_sequence_hit(FPC_SEQ* pseq, int clone_idx, char rf,int,int,seq_hit_type);
void spooky2(int box, float x, float y);
void falsepick(int box, float x, float y);
void seq_set_min_clones(char* num);
void seq_set_min_ctgs(char* num);
void seq_set_window_size(char* num);
void fpc_load_seqinfo(char* line);
void fpc_load_bssinfo(char* line);
void scan_seq_props();
void sequence_window();
int is_whitespace(char* str);
void clear_listroot();
void load_besmap_file(char* path);
extern void load_layout();
extern void add_to_projmsg(int k, char* msg);
extern void  Zproj_results();
extern void addfpremark(char *junk,int index);
void clear_fprem_pattern(char* pat);

static char window_size_sz[256];
static char min_clones_sz[256];
static char min_ctgs_sz[256];
static char top_n_sz[256];
static char search_name_sz[256];
static char ctg_fromend_sz[256];
static char seq_fromend_sz[256];
static char ctg_minspan_sz[256];
static char seq_minspan_sz[256];
static int uniqueArc, placedArc, multiplacedArc, endsArc, endmatchArc;
static int expand_ctgs;
static int expand_seqctgs;
static int expand_place;

static int unaligned_minclones = 10;
static int unaligned_minseqlen = 200; // in kb
static char unaligned_minclones_sz[10];
static char unaligned_minseqlen_sz[10];

extern struct layout_ctg_info *ctg_layout;
extern GtkWidget *ctg_window;
extern int lexstrcmp_fpc();

#define GETCURDIR (strcmp(dirName,"") ? dirName : ".")

FPC_TREE seq_tree;

int gSEQ;
int seq_min_clones;

extern int max_seqsz;
FPC_SEQ* highseq;
FPC_SEQ* centreSequence;

void load_draft_bss_file(char* path);
void load_info_file(char* path);
void load_draft_bss_file_cb();
void load_info_file_cb();

struct seq_props SeqProps;
struct file_list *seq_info_files;

typedef enum {CTG,EXPCTGS,EXPSEQCTGS, EXPPLACE} seqbox_types;
struct seqdisplist
{
    int box;
    seqbox_types type;
    int ctg;
    int pos;
    FPC_SEQ* seq;
    struct seqdisplist* next;
};
struct seqdisplist* sdlist;



void fpc_load_info_file(char* line)
{
    char path[MAXPATHLEN]; 
    char type[20];   
    assert(line);
    
    if (2 == sscanf(line,"%s %s",type,path))
    {
        if (!strcmp(type,"SEQ_INFO"))
        {
            load_info_file(path);   
        }
        else if (!strcmp(type,"BES_MAP"))
        {
            load_besmap_file(path); 
        }
        else if (!strcmp(type,"SEQ_BSS"))
        {
            load_draft_bss_file(path);
        }

                            
    }   
    else
    {
        printf("Bad line %s\n",line);fflush(0);
    }
}

void add_sequence_hit(FPC_SEQ* pseq, int clone_idx, char rf, int sstart, int send, seq_hit_type ht)
{
    SEQHIT* newhit;
    CLONE* clp;

    assert(pseq);
    clp = arrp(acedata,clone_idx,CLONE);
    assert(clp);

    /* Create the hit */
    newhit = (SEQHIT*)malloc(sizeof(SEQHIT));
    memset(newhit,0,sizeof(SEQHIT));
    newhit->clone_idx = clone_idx;
    newhit->seq = pseq;
    newhit->rf = rf;
    newhit->seq_start = sstart;
    newhit->seq_end = send;
    newhit->type = ht;

    /* Add the hit to the sequence object */
       
    newhit->next = pseq->hits;
    pseq->hits = newhit;

    /* Duplicate the hit and add to the clone object */

    newhit = (SEQHIT*)malloc(sizeof(SEQHIT));
    memcpy(newhit,pseq->hits,sizeof(SEQHIT));

    newhit->next = clp->seq_hits;
    clp->seq_hits = newhit;
}

void remove_seq_hit_from_clone(int idx, FPC_SEQ* pseq)
{
    SEQHIT *phit, *pprev, *pcur;
    CLONE* clp;

    assert(pseq);
    clp = arrp(acedata,idx,CLONE);
    assert(clp);

    phit = clp->seq_hits;
    pprev = 0;
    while (phit)
    {
        pcur = phit;
        phit = phit->next;
        if (pcur->seq == pseq)
        {
            free(pcur);
            if (pprev)
            {
                pprev->next = phit;
            } 
            else
            {
                clp->seq_hits = phit;
            }                               
        }
        else
        {
            pprev = pcur;
        }
    }

}

void remove_sequence(char* seq)
{
    FPC_SEQ* pseq;
    SEQHIT* phit;
    pseq = find_sequence(seq);
    if (pseq)
    {
        /* get rid of the hits attached to clones */
        phit = pseq->hits;
        while (phit)
        {
            remove_seq_hit_from_clone(phit->clone_idx,pseq);
        }   
        fpc_tree_remove_data(&seq_tree,seq,free_seq_data);
    }
    else
    {
        printf("Failed to find %s\n",seq);fflush(0);
    }
}
void load_besmap_file_cb()
{
    FILE *fp;

    char path[MAXPATHLEN+1];
    char file[MAXPATHLEN+1];


    file[0] = 0;
    strcpy(path, GETCURDIR);

    if((fp = (FILE *)graphQueryOpen(path, file, "","r",
                                  "Load info file")) == NULL)
    {
        return;
    }
    fclose(fp);

    sprintf(path,"%s/%s",path,file);
    load_besmap_file(path);    
}
void load_info_file_cb()
{
    FILE *fp;

    char path[MAXPATHLEN+1];
    char file[MAXPATHLEN+1];


    file[0] = 0;
    strcpy(path, GETCURDIR);

    if((fp = (FILE *)graphQueryOpen(path, file, "","r",
                                  "Load info file")) == NULL)
    {
        return;
    }
    fclose(fp);

    sprintf(path,"%s/%s",path,file);
    load_info_file(path);    
}
void add_to_file_list( struct file_list* pfiles)
{
    struct file_list *tpf;
    if (seq_info_files)
    {
        // let's not add it if it's already there
        for (tpf = seq_info_files; tpf->next; tpf = tpf->next)
        {
            if (!strcmp(tpf->path,pfiles->path))
            {
                 return;
            }
        }

        // we have to insert this at the end in case order matters in the loading
        // find the end
        for (tpf = seq_info_files; tpf->next; tpf = tpf->next);

        assert(!tpf->next);
        tpf->next = pfiles;
    }
    else
    {
        seq_info_files = pfiles;
    }
}
void add_seq_track()
{
    int i;
    struct layout_track_info **pt, *t;
    struct layout_filter_info *f;
    int found;

    if (!fpc_tree_has_content(&seq_tree)) return;
    if(ctg_layout==NULL) load_layout();
    
    for (i = 0; i <= max_contig; i++)
    {
        found = 0;
        // use the double pointer just in case there are no track entries at all
        for (pt = &ctg_layout[i].tracks; *pt; pt = &((*pt)->next))
        {
            if ((*pt)->entity == SEQUENCE) found=1;
        }
        if (!found)
        {
            *pt = (struct layout_track_info*)malloc(sizeof(struct layout_track_info));
            t = *pt;
            memset(t,0,sizeof(struct layout_track_info));

            t->entity = SEQUENCE;
            t->row_policy = AUTO_POLICY;
            t->numrows = 0;
            t->show_anchor_bins = 0;
            t->show_anchor_pos = 0;
            t->show_anchor_lines = 0;
            t->height = 70;  
            t->filters = malloc(sizeof(struct layout_filter_info));
              f=t->filters;
              strcpy(f->name, "*");
              strcpy(f->remark, "");
              f->type = 0;
              f->status = 0;
              f->chrom = 0;
              f->attachment = 0;
              f->color_choice = BLACK_COLOR;
              f->next = NULL;            
        }
    }
}

/* Load the file detailing the supercontig/seqctg lengths and relative positions */
void load_info_file(char* path)
{
    FILE *readfile = NULL;

    char line[1024];
    char* p;
    char seqname[1024];
    int seqlen;
    int pos;
    int nsuper, nsub;
    struct file_list* pfiles;
    FPC_SEQ *pseq, *pseq2;
    FPC_SEQ_LIST* plist_node, *pprev;


    printf("load sequence info file:%s\n",path);fflush(0);
    p = 0;
    readfile=fopen(path,"r");
    if (!readfile)
    {
        printf("Unable to open %s\n",path);fflush(0);
        p = strrchr(path,'/');
        if (p)
        {
            p++;
            printf("Trying ./%s\n",p);fflush(0);
            readfile = fopen(p,"r");
            if (!readfile)
            {
                printf("Failed\n");fflush(0);
                return;
            }
        }
    }

    nsuper = 0;
    nsub = 0;

    while  (fgets(line,1024,readfile))
    {
        if (is_whitespace(line))
        {
            continue;
        }
        if (line[0] == '>')
        {
            if (2 == sscanf(line,">%s %d",seqname,&seqlen))
            {
                pseq = find_sequence(seqname);
                if (!pseq)
                {
                    //printf("Add supercontig %s length %ld\n",seqname,seqlen);fflush(0);
                    pseq = (FPC_SEQ*)malloc(sizeof(FPC_SEQ));
                    memset(pseq,0,sizeof(FPC_SEQ));
                    pseq->length = seqlen;
                    pseq->type = ST_DRAFT;
                    pseq->name = strdup(seqname);
                    insert_sequence(pseq);
                    nsuper++;
                }
                else
                {
                    printf("Duplicate supercontig name %s\n",seqname);fflush(0);
                }
            }
            else
            {
                printf("Bad format:%s",line);fflush(0);
            }                
        }
        else
        {
            if (3 == sscanf(line,"%s %d %d",seqname,&pos,&seqlen))
            {
                pseq2 = find_sequence(seqname);
                if (!pseq2)
                {
                    //printf("Add seqcontig %s length %ld\n",seqname,seqlen);fflush(0);
                    pseq2 = (FPC_SEQ*)malloc(sizeof(FPC_SEQ));
                    memset(pseq2,0,sizeof(FPC_SEQ));
                    pseq2->length = seqlen;
                    pseq2->type = ST_DRAFT;
                    pseq2->name = strdup(seqname);
                    pseq2->pos = pos;
                    pseq2->parent = pseq;
                    insert_sequence(pseq2);
                    nsub++;
                }
                else
                {
                    printf("Duplicate seqcontig name %s\n",seqname);fflush(0);
                }

                plist_node = (FPC_SEQ_LIST*)malloc(sizeof(FPC_SEQ_LIST));
                memset(plist_node,0,sizeof(FPC_SEQ_LIST));
                plist_node->seq = pseq2;
                // add it to the seqctg list of its parent in order of pos
                if (pseq->ctgs)
                {
                    pprev = pseq->ctgs;
                    if (pprev->seq->pos < pos)
                    {
                        for (pprev = pseq->ctgs; pprev && pprev->next && 
                                        pprev->next->seq->pos < pos; pprev = pprev->next);

                         plist_node->next = pprev->next;
                         pprev->next = plist_node;
                    }
                    else
                    {
                        plist_node->next = pprev;
                        pseq->ctgs = plist_node;
                    }
                }
                else
                {
                    pseq->ctgs = plist_node;
                }

            }
            else
            {
                printf("Bad format:%s",line);fflush(0);
            }            
        }
    }      
    
    if (nsuper) printf("Loaded %d supercontigs, %d seqctgs\n",nsuper,nsub);fflush(0);    
    
    pfiles = (struct file_list*)malloc(sizeof(struct file_list));
    memset(pfiles,0,sizeof(struct file_list));

    if (p) strcpy(pfiles->path,p);
    else strcpy(pfiles->path,path);

    pfiles->next = 0;
    pfiles->n1 = nsuper;
    pfiles->n2 = nsub;
    pfiles->type = FT_SEQ;

    add_to_file_list(pfiles);
 
    add_seq_track();   
}

/* Load embedded BES map file. These are loaded as hits from their
    seqctg to their clone. */
void load_besmap_file(char* path)
{
    FILE *readfile = NULL;

    char line[1024];
    char* p;
    char seqname[1024];
    int pos;
    int nsuper, nsub;
    struct file_list* pfiles;
    char  *rfptr;
    FPC_SEQ *pseq;
    int cidx;
    int nbes=0, nplaced=0;
    int seqlen;

    printf("load besmap file:%s\n",path);fflush(0);
    p = 0;
    readfile=fopen(path,"r");
    if (!readfile)
    {
        printf("Unable to open %s\n",path);fflush(0);
        p = strrchr(path,'/');
        if (p)
        {
            p++;
            printf("Trying ./%s\n",p);fflush(0);
            readfile = fopen(p,"r");
            if (!readfile)
            {
                printf("Failed\n");fflush(0);
                return;
            }
        }
    }

    while  (fgets(line,1024,readfile))
    {
        if (line[0] == '>')
        {
            if (1 == sscanf(line,">%s",seqname))
            {
                pseq = find_sequence(seqname);
                if (!pseq)
                {
                    printf("Ignoring unknown draft sequence %s\n",seqname);fflush(0);
                }
            }
            else
            {
                printf("Bad format:%s",line);fflush(0);
            }                
        }
        else if (pseq)
        {
            if (3 == sscanf(line,"%s %d %d",seqname,&pos,&seqlen))
            {                
                rfptr = strrchr(seqname,'.');
                if (!rfptr)
                {
                    fprintf(stderr,"Badly formatted BES name %s (should be CloneName.ext) \n",seqname);fflush(0);
                    continue;
                }
                else
                {
                    *rfptr = 0;
                    rfptr++;
                    nbes++;
                    if (fppFind(acedata,seqname,&cidx,cloneOrder))
                    {
                        add_sequence_hit(pseq, cidx, rfptr[0],pos,pos+seqlen,HT_BES);
                        nplaced++;
                    }
                    else
                    {
                        //printf("Clone %s not found in FPC\n",seqname);fflush(0);
                    }
                }
            }
            else
            {
                printf("Bad format:%s",line);fflush(0);
            }            
        }
    }      
    
    printf("Loaded %d BES map entries, %d found in FPC\n",nbes,nplaced);fflush(0);
    
    pfiles = (struct file_list*)malloc(sizeof(struct file_list));
    memset(pfiles,0,sizeof(struct file_list));

    if (p) strcpy(pfiles->path,p);
    else strcpy(pfiles->path,path);

    pfiles->next = 0;
    pfiles->n1 = nsuper;
    pfiles->n2 = nsub;
    pfiles->type = FT_BES;

    add_to_file_list(pfiles);

    add_seq_track();
}
static void
load_draft_bss_file_another_cb(GtkWidget *widget, GtkFileSelection* selector) 
{
    gchar *gpath;
    char path[MAXPATHLEN+1];
    char path2[MAXPATHLEN+1];
    char* pc;
    FILE* fp;
    DIR* dp;
    gchar* m;
    int messresult;
    struct dirent* dp2;

    gpath = (gchar*)gtk_file_selection_get_filename(selector);
    strcpy(path,gpath);

    pc = strstr(path,"*.bss");
    if (pc)
    {
        *pc = 0;   
    } 

    if (strlen(path) > 1 && path[strlen(path) - 1] == '/')
    {
        path[strlen(path) - 1] = 0;
    }

    dp = opendir(path);
    if (dp)
    {
        /* load all the .bss files in the directory */
        m = g_strdup_printf("Load all BSS files in %s?",  path);
        messresult = messQuery(m);
        g_free(m);
        if (messresult != 0) 
        {
            while ( (dp2 = readdir(dp)) )
            {
                strcpy(path2,dp2->d_name);
                if (strlen(path2) > 4 && 0 == strcmp(&path2[strlen(path2)-4],".bss"))
                {
                    sprintf(path2,"%s/%s",path,dp2->d_name);
                    load_draft_bss_file(path2);
                }   
            }
        }
        closedir(dp);
    }
    else if ((fp = fopen(path,"r")))
    {
        /* single file */
        /* convert to a relative path if possible  WN why was I doing this? */
    
        fclose(fp);
        pc = strstr(path,"BSS_results");
        if (0) //pc)
        {
            if ( (fp = fopen(pc,"r")))
            {
                fclose(fp);
                load_draft_bss_file(pc);    
            }
        }
        else
        {
            load_draft_bss_file(path);    
        }
    }
    else
    {
        messout("Can't open %s",path);
    }
}
void load_draft_bss_file_cb()
{
    char path[MAXPATHLEN+1];
    DIR* dp;

    GtkWidget* selector;

    dp = opendir("BSS_results");
    if (dp)
    {
        sprintf(path,"BSS_results/*.bss");
        closedir(dp);
    }
    else
    {
        sprintf(path,"*.bss");
    }
    selector=gtk_file_selection_new("Please select a query file or directory");

    gtk_file_selection_set_filename(GTK_FILE_SELECTION(selector), path);
    gtk_file_selection_hide_fileop_buttons(GTK_FILE_SELECTION(selector));

    gtk_signal_connect(GTK_OBJECT (GTK_FILE_SELECTION(selector)->ok_button),
                     "clicked", GTK_SIGNAL_FUNC (load_draft_bss_file_another_cb), selector);
    gtk_signal_connect_object (GTK_OBJECT (GTK_FILE_SELECTION(selector)->ok_button),
   				     "clicked", GTK_SIGNAL_FUNC (gtk_widget_destroy),
   				     (gpointer) selector);
    gtk_signal_connect_object (GTK_OBJECT (GTK_FILE_SELECTION(selector)->cancel_button),
   				     "clicked", GTK_SIGNAL_FUNC (gtk_widget_destroy),
   				     (gpointer) selector);

    gtk_widget_show(selector);

    return;
}
void destroy_bss_hit(struct bss_hit* phit)
{
    int i;
    enum bss_datatype field;
    for (i = 0; i < bss_ncols; i++)
    {
        field = bss_column_order[i];
        switch (field)
        {
            case BSS_BES:
            case BSS_SEQCTG:
            case BSS_MARKER:
                if (phit->data[field].str) free(phit->data[field].str);
            break;
            default:
                assert(0);
            break;
        } 
    } 
    memset(phit,0,sizeof(struct bss_hit));     
}
void init_bss_hit(struct bss_hit* phit)
{
    int i;
    enum bss_datatype field;
    memset(phit,0,sizeof(struct bss_hit));
    for (i = 0; i < bss_ncols; i++)
    {
        field = bss_column_order[i];
        switch (field)
        {
            case BSS_QSTART:
            case BSS_QEND:
            case BSS_TSTART:
            case BSS_TEND:
            case BSS_TLEN:
            case BSS_SEQCTG:
            case BSS_SCORE:
                phit->data[field].i = atoi(bss_fields[i]);
            break;

            case BSS_BES:
            case BSS_MARKER:
            case BSS_RC:
            case BSS_CLONE:
                phit->data[field].str = strdup(bss_fields[i]);
            break;
            case BSS_IDENTITY:
                sscanf(bss_fields[i],"%d",&phit->data[field].i);
                break;
            default:
                //assert(0);
            break;
        } 
    }
}
int split_line(char* _line)
{
    int i = 0;
    char* ptr;
    char* line;

    line = strdup(_line);
    for (i = 0; i < 30; i++)
    {
        bss_fields[i][0] = 0;
    }
    ptr = strtok(_line," \t");
    for (i = 0; i < 30; i++)
    {
        if (ptr)
        {
            strncpy(bss_fields[i],ptr,19); /* WMN NOT SAFE */
        }
        else
        {
            break;
        }
        ptr = strtok(0," \t\n");
    }
    free(line);
    return i;
}

void init_bss_column_order(char* line)
{
    int i,j;
    int found;
    bss_ncols = 0;

    split_line(line);
    for (i = 0; i < BSS_NUMTYPES; i++)
    {
        bss_column_order[i] = BSS_NUMTYPES; 
        if (bss_fields[i][0] == 0)
        {
            break;
        } 
        found = 0;
        for (j = 0; j < BSS_NUMTYPES; j++)
        {
            if (0 == strcmp(bss_fields[i],bss_headers[j]))
            {
                bss_column_order[i] = j;
                found = 1;
                break;
            }   
        }
        if (!found)
        {
            printf("unknown column %s in bss file\n",bss_fields[i]);fflush(0);
        }
        bss_ncols++;
    }
}

int is_consistent_hit(int qlen, struct bss_hit* phit)
{
    int s1,s2,e1,e2,tlen;
    int trim = 10;
    int ret = 0;

    s1 = phit->data[BSS_QSTART].i;
    e1 = phit->data[BSS_QEND].i;
    s2 = phit->data[BSS_TSTART].i;
    e2 = phit->data[BSS_TEND].i;
    tlen = phit->data[BSS_TLEN].i;

    
    if (s1 <= trim && e1 >= qlen - trim)
    {
        // query totally aligned - ok
        ret = 1;
    }
    else if (s2 <= trim && e2 >= tlen - trim)
    {
        // target fully aligned
        ret = 1;
    }
    else
    {
        // each alignment needs to extend to one end
        if (  (s2 <= trim || e2 >= tlen - trim) 
            && (s1 <= trim || e1 >= qlen - trim))
        {
            ret = 1;
        } 
    }
    if (ret)
    {
        printf("accept:%d %d %d %d %d %d\n",s1,e1,qlen,s2,e2,tlen);fflush(0);
    }
    else
    {
        printf("reject:%d %d %d %d %d %d\n",s1,e1,qlen,s2,e2,tlen);fflush(0);
    }
    return ret;
}

void load_draft_bss_file(char* path)
{
    FPC_SEQ* pseq;
    int idx;
    char rf;
    int len;
    int count = 0;
    char* besname;
    char* clonename;
    int badclonecount = 0;
    int badseqcount = 0;
    struct file_list* pfiles;
    BSS_INST bsi;
    BSS_HIT* hit;
    int inconsistent = 0;
    int i;

    printf("load BSS file:%s\n",path);fflush(0);
    if (!read_bss_file(path,&bsi))
    {
        printf("Failed to load file\n");fflush(0);
        return;
    }

    // Load the hits. If the query name doesn't match something already 
    // loaded in an info file, we'll add it as a new supercontig.
    for (i = 0; i < arrayMax(bsi.bss_hits); i++)
    {        
        hit = arrp(bsi.bss_hits,i,BSS_HIT);

        pseq = find_sequence(hit->data[BSS_MARKER].str); 
        if (!pseq)
        {        
//            if (badseqcount <= 1000)
//            {            
//                printf("Found hit for new sequence %s\n",hit->data[BSS_MARKER].str);fflush(0);     
//            }
//            if (badseqcount == 1000)
//            {
//                printf("No more will be reported...\n");fflush(0);
//            }  
//            badseqcount++;

            pseq = (FPC_SEQ*)malloc(sizeof(FPC_SEQ));
            memset(pseq,0,sizeof(FPC_SEQ));
            pseq->length = hit->data[BSS_QLEN].i;
            pseq->type = ST_DRAFT;
            pseq->name = strdup(hit->data[BSS_MARKER].str);
            insert_sequence(pseq);            
        }

        besname = strdup(hit->data[BSS_BES].str);
        len = strlen(besname);
        assert(len > 0);
        rf = (char)besname[len-1];
       	clonename = strdup(hit->data[BSS_CLONE].str);
        if (fppFind(acedata,clonename,&idx,cloneOrder))
        {
            if (0) //SeqProps.endmatch_check && !is_consistent_hit(pseq->length,hit))
            {
                inconsistent++;
                continue;
            }
        }
        else
        {
            if (badclonecount <= 1000)
            {
                printf("Found hit for unknown clone %s\n",clonename);fflush(0);
            }  
            if (badclonecount == 1000)
            {
                printf("No more will be reported...\n");fflush(0);
            }  
            badclonecount++;   
        }
        add_sequence_hit(pseq,idx,rf,hit->data[BSS_QSTART].i,hit->data[BSS_QEND].i,HT_BSS);
        count++;
        free(besname);
		free(clonename);
    }

    printf("Loaded %d hits\n",count);fflush(0);

    pfiles = (struct file_list*)malloc(sizeof(struct file_list));
    strcpy(pfiles->path,path);
    pfiles->next = 0;
    pfiles->n1 = count;
    pfiles->type = FT_BSS;

    add_to_file_list(pfiles);
    add_seq_track();
}


void init_seq_tree()
{
    init_fpc_tree(&seq_tree);
}
void insert_sequence(FPC_SEQ* pseq)
{
    fpc_tree_insert_node(&seq_tree,(void*)pseq,pseq->name);
    if (strlen(pseq->name) > max_seqsz)
    {
        max_seqsz = strlen(pseq->name);
    }
}

FPC_SEQ* find_sequence(char* name)
{
    FPC_SEQ* pseq;
    pseq = (FPC_SEQ*)fpc_tree_find_data(&seq_tree,name);
    if (!pseq)
    {
        //fprintf(stderr,"Failed to find sequence %s\n",name);
    }   
    return pseq;
}

void destroy_seq_tree()
{
    gchar* m = g_strdup_printf("Remove all sequence data from FPC?");
    int messres = messQuery(m);
    g_free(m);
    if (messres == 0) return;
    
    printf("Removing all sequences\n");fflush(0);

    if (classctg == SEQCLASS)
    {
        if(graphActivate(g2))    
	        graphDestroy();
         
        clear_listroot();
    }
    if(graphActivate(gsequence))
    {
        graphDestroy();
    }
    if(ctg_window!=NULL) gtk_widget_destroy(ctg_window);

    clear_fpc_tree(&seq_tree,free_seq_data);
    init_seq_tree();
    seq_info_files = 0;
}
 

void free_seq_data(void** pdata)
{
    FPC_SEQ* pseq;

    struct remark *rem1, *rem2;
    SEQHIT *h1, *h2;
    
    pseq = (FPC_SEQ*)*pdata;
    rem1 = pseq->remark;
    while (rem1)
    {
        rem2 = rem1;
        rem1 = rem1->next;
        free(rem2);        
    }
    pseq->remark = 0;


    h1 = pseq->hits;
    while (h1)
    {
        h2 = h1;
        h1 = h1->next;  
        remove_seq_hit_from_clone(h2->clone_idx, pseq);      
        free(h2);       
    }
    pseq->hits = 0;

    free(pseq->name);
    pseq->name = 0;
    free(pseq);
    *pdata = 0;
}




void test_seq_tree()
{
    FPC_SEQ* s1 = malloc(sizeof(FPC_SEQ));
    memset(s1,0,sizeof(FPC_SEQ));
    FPC_SEQ* s2 = malloc(sizeof(FPC_SEQ));
    memset(s2,0,sizeof(FPC_SEQ));
    FPC_SEQ* s3 = malloc(sizeof(FPC_SEQ));
    memset(s3,0,sizeof(FPC_SEQ));
    FPC_SEQ* s4 = malloc(sizeof(FPC_SEQ));
    memset(s4,0,sizeof(FPC_SEQ));
    FPC_SEQ* s5 = malloc(sizeof(FPC_SEQ));
    memset(s5,0,sizeof(FPC_SEQ));

    s1->name = strdup("a001");
    s2->name = strdup("a0020");
    s3->name = strdup("b100");
    s4->name = strdup("a002");
    s5->name = strdup("a0021");
    insert_sequence(s1);
    insert_sequence(s2);
    insert_sequence(s3);
    insert_sequence(s4);
    insert_sequence(s5);
    dump_fpc_tree(&seq_tree,0);

    s1 = find_sequence("a0020");
    printf("search %s found %s\n","a0020",s1->name);
    s1 = find_sequence("a001");
    printf("search %s  found %s\n","a001",s1->name);
    s1 = find_sequence("a002");
    printf("search %s  found %s\n","a002",s1->name);
    s1 = find_sequence("a0021");
    printf("search %s  found %s\n","a0021",s1->name);
    s1 = find_sequence("b100");
    printf("search %s  found %s\n","b100",s1->name);

    find_sequence("a0023");
    remove_sequence("b000");
    remove_sequence("a002");
    remove_sequence("a0020");
    remove_sequence("b100");
    dump_fpc_tree(&seq_tree,0);

}

void _dump_seq_tree(FPC_TREE* ptree, int level)
{
    int i;
    SEQHIT* nexthit;
    FPC_SEQ_LIST* nextctg;
    FPC_SEQ* pseq;

    assert(ptree);
    for(i = 0; i < level; i++)
    {
        printf("  ");
    }    


    if (ptree->pdata)
    {
        pseq = (FPC_SEQ*)ptree->pdata;
        printf("%c %d\n",ptree->key,pseq->length);
        nextctg = pseq->ctgs;
        
        while (nextctg)  
        {
            for(i = 0; i < level+1; i++)
            {
                printf("  ");
            } 
            printf("Ctg:%s %d, %d\n",nextctg->seq->name,nextctg->seq->pos,nextctg->seq->length);
            nextctg = nextctg->next;
        }        
        nexthit = pseq->hits;
        while (nexthit)  
        {
            for(i = 0; i < level+1; i++)
            {
                printf("  ");
            } 
            printf("Hit:%s\n",arrp(acedata,nexthit->clone_idx,CLONE)->clone);
            nexthit = nexthit->next;
        }
    }
    else
    {
        printf("%c\n",ptree->key);
    }
    if (ptree->child != 0)
    {
        _dump_seq_tree(ptree->child,level+1);        
    }
    if (ptree->sibling != 0)
    {
        _dump_seq_tree(ptree->sibling,level);
    } 
}

void dump_seq_tree()
{
//    traverse_seq_tree();
//    return;

    printf("Sequence data tree:\n");fflush(0);
    _dump_seq_tree(&seq_tree,0);
}

void traverse_seq_tree()
{
    FPC_STACK stack;
    FPC_TREE* pnode;
    FPC_SEQ* pseq;

    fpc_stack_init(&stack);
    pnode = &seq_tree;
    while (pnode)
    {
        printf("%c\n",pnode->key);fflush(0);
        if (pnode->pdata)
        {
            pseq = (FPC_SEQ*)pnode->pdata;
            printf("\t%s\n",pseq->name);fflush(0);           
        }
        pnode = fpc_tree_next_node(pnode, &stack);
    }  
}


void seq_actions(int box, float x, float y)
{
    struct seqdisplist* sd;
    int redisplay = 0;
    for (sd = sdlist; sd; sd = sd->next)
    {
        if (sd->box == box)
        {
            switch (sd->type)
            {
                case CTG:
                    centreSequence = sd->seq;
                    centre_pos = sd->pos;
                    ctgdisplay(sd->ctg);
                    gdk_window_raise(ctg_window->window);
                    break;
                case EXPCTGS:
                    expand_ctgs = !expand_ctgs;
                    redisplay = 1;
                    break;
                case EXPSEQCTGS:
                    expand_seqctgs = !expand_seqctgs;
                    redisplay = 1;
                    break;
                case EXPPLACE:
                    expand_place = !expand_place;
                    redisplay = 1;
                    break;
            }
        }
    }   
    if (redisplay) displaysequence(seqdisplayed);
}


void add_seq_action_box(int ibox, seqbox_types type, int ctg, FPC_SEQ* pseq, int pos)
{
    struct seqdisplist* sd;

    sd = (struct seqdisplist*)malloc(sizeof(struct seqdisplist));
    memset(sd,0,sizeof(struct seqdisplist));  
    sd->box = ibox;
    sd->ctg = ctg;
    sd->type = type;
    sd->next = sdlist;
    sd->seq = pseq;
    sd->pos = pos;
    sdlist = sd;
}

void displaysequence(FPC_SEQ* pseq)
{
    float line;
    char text[256];
    FPC_SEQ_LIST* pctgs;
    FPC_SEQ* pctg;
    SEQHIT* phit;
    int nctgs;
    CLONE* clp;
    char type[100];
    struct seqctgpos* ctgpos;
    struct seqdisplist* sdl1, *sdl2;
    int ibox;
    int nplace; 
    int* ctgcounts;
    int i;
    int nctghit;

    assert(pseq);

    sdl1 = sdlist;
    while (sdl1)
    {
        sdl2 = sdl1;
        sdl1 = sdl1->next;
        memset(sdl2,0,sizeof(struct seqdisplist));
        free(sdl2);  
    }
    sdlist = 0;
    
    if (seqdisplayed != pseq)
    {
        expand_ctgs = 0;
        expand_seqctgs = 0;
        expand_place = 0;
    }
    seqdisplayed = pseq;

    if(graphActivate(gsequence))
    {   
        graphClear();
        if (seqdisplayed != pseq)
        {     
            graphPop();     
        }
    }
    else
    { 
        gsequence = graphCreate (TEXT_SCROLL,"Sequence",.2,.2,.26,.3) ;
        graphRegister(PICK, seq_actions);
    }

    switch (pseq->type)
    {
        case ST_DRAFT:
            strcpy(type,"Draft");
            break;
        case ST_CLONE:
            strcpy(type,"BAC");
            break;
    }

    line = 1.0;
    sprintf(text,"%s %s",pseq->name,type);
    graphText(text,1.0,line);
    line += 1.0;
    sprintf(text,"%d bp",pseq->length);
    graphText(text,1.0,line);

    nctgs = 0;
    pctgs = pseq->ctgs;
    while (pctgs)
    {
        nctgs++;
        pctgs = pctgs->next;
    }
    if (nctgs > 0)
    {
        line += 2.0;
        sprintf(text, "SeqCtgs: %d", nctgs);
        ibox = graphBoxStart();
        graphTextFormat(BOLD);
        graphText(text, 1.0, line);
        graphBoxEnd();
        graphTextFormat(PLAIN_FORMAT);
        add_seq_action_box(ibox,EXPSEQCTGS,0, 0,0);

        if (expand_seqctgs)
        {
            for (pctgs = pseq->ctgs; pctgs; pctgs = pctgs->next)
            {
                sprintf(text, "%s (%d bp)", pctgs->seq->name, pctgs->seq->length);
                line += 1.0;
                graphText(text,2.0,line);
            }
        }
    }

    for(ctgpos = pseq->ctgpos, nplace = 0; ctgpos; ctgpos = ctgpos->next, nplace++);
    
    if (nplace > 0)
    {
        line += 2.0;
        sprintf(text,"Placements: %d", nplace);
        if (nplace <= 5)
        {
            graphText(text,1.0,line);
            expand_place = 1;
        }
        else
        {
            ibox = graphBoxStart();
            graphTextFormat(BOLD);
            graphText(text, 1.0, line);
            graphBoxEnd();
            graphTextFormat(PLAIN_FORMAT);
            add_seq_action_box(ibox,EXPPLACE,0, 0,0);
        }
        if (expand_place)
        {
            for (ctgpos = pseq->ctgpos; ctgpos; ctgpos = ctgpos->next)
            {
                ibox = graphBoxStart();
                line += 1.0;
                sprintf(text, "Ctg%d", ctgpos->ctg);
                graphTextFormat(BOLD);
                graphText(text, 3.0, line);
                graphTextFormat(PLAIN_FORMAT);
                sprintf(text, "%d hits", ctgpos->score);
                graphText(text, 11.0, line);

                sprintf(text, "%d-%d kb", ctgpos->seq_start/1000,ctgpos->seq_end/1000);
                graphText(text, 21.0, line);

                graphBoxEnd();
                add_seq_action_box(ibox,CTG,ctgpos->ctg, pseq, ctgpos->pos);
            }
        }
    }

    /* collect the hits by contig */

    ctgcounts = (int*)malloc((max_contig+1)*sizeof(int));
    for(i = 0; i <= max_contig; i++)
    {
        ctgcounts[i] = 0;
    }

    phit = pseq->hits;
    while (phit)
    {
        clp = arrp(acedata,phit->clone_idx,CLONE);
        ctgcounts[clp->ctg]++;
        phit = phit->next;
    }
    pctgs = pseq->ctgs;
    while (pctgs)
    {
        pctg = pctgs->seq;
        phit = pctg->hits;
        while (phit)
        {
            clp = arrp(acedata,phit->clone_idx,CLONE);
            ctgcounts[clp->ctg]++;
            phit = phit->next;
        }        
        pctgs = pctgs->next;
    }
    nctghit = 0;
    for(i = 0; i <= max_contig; i++)
    {
        if (ctgcounts[i] > 0)
        {
            nctghit++;
        }
    }
    
    if (nctghit > 0)
    {
        line += 2.0;
        sprintf(text,"All Ctgs Hit (%d)",nctghit);
        ibox = graphBoxStart();
        graphTextFormat(BOLD);
        graphText(text, 1.0, line);
        graphBoxEnd();
        graphTextFormat(PLAIN_FORMAT);
        add_seq_action_box(ibox,EXPCTGS,0, 0,0);    

        if (expand_ctgs)
        {
            for(i = 0; i <= max_contig; i++)
            {
                if (ctgcounts[i] > 0)
                {
                    line += 1.0;
                    sprintf(text,"Ctg%d",i);
                    graphText(text, 3.0, line);
                    sprintf(text,"%d hits",ctgcounts[i]);
                    graphText(text, 11.0, line);
                }
            }        
        }    
    }

    graphTextBounds(6, line+5);
    graphRedraw();
}

int seq_count_contig_hits(FPC_SEQ* pseq, int ctg)
{
    int ret = 0;
    FPC_SEQ_LIST* pctgs;
    SEQHIT* phit;
    CLONE* clp;
    int clones[100];
    int i;
    int found;

    assert(pseq);

    for (i = 0; i < 100; i++) clones[i] = 0;

    phit = pseq->hits;
    while (phit)
    {
        clp = arrp(acedata,phit->clone_idx,CLONE);
        if (clp->ctg == ctg)
        {
            found = 0;
            for (i = 0; i < ret; i++)
            {
                if (clones[i] == phit->clone_idx)
                {
                    found = 1;
                }
            }
            if (found == 0)
            {
                clones[ret] = phit->clone_idx;
                ret++;
            }
        }
        phit = phit->next;
    }
    pctgs = pseq->ctgs;
    while (pctgs)
    {
        phit = pctgs->seq->hits;
        while (phit)
        {
            clp = arrp(acedata,phit->clone_idx,CLONE);
            if (clp->ctg == ctg)
            {
                found = 0;
                for (i = 0; i < ret; i++)
                {
                    if (clones[i] == phit->clone_idx)
                    {
                        found = 1;
                    }
                }
                if (found == 0)
                {
                    clones[ret] = phit->clone_idx;
                    ret++;
                }
            }
            phit = phit->next;
        }
        pctgs = pctgs->next;
    }
    return ret;
}

void compute_seq_location(FPC_SEQ* pseq,struct seqctgpos* ctgpos,int page_left, int page_right, int *pleft, int *pright) 
{
    assert(pseq);
    assert(ctgpos);
    assert(pleft);
    assert(pright);
    assert(page_right > page_left);

    *pleft = ctgpos->ctg_start;       
    *pright = ctgpos->ctg_end;       
    
/*
    seqsize = (int)( pseq->length/Proj.avgbandsize);
    *pleft = MaX((int)(ctgpos->pos - .5*seqsize),0);
    *pright = MaX((int)(ctgpos->pos + .5*seqsize),0);
    if (ctgpos->pos >= page_left && ctgpos->pos <= page_right)
    {
        // seq should show on this page so make sure it fits
        if (*pright >= page_right)
        {
            *pright = page_right - 10;
        }
        if (*pleft <= page_left)
        {
            *pleft = page_left + 10;
        }
    }    
*/
}


struct temphitlist
{
    int ctg;
    int pos;
    int seq_pos;
    int seq_start;
    int seq_end;
    int clone_idx;
    char rf;
    FPC_SEQ* seq;
    struct temphitlist* next;
};

void insert_temp_hit(struct temphitlist** plist, struct temphitlist* pnew)
{
    struct temphitlist* p;

    assert(pnew);

    /* do we want to put the new one first? */
    if (!*plist || (*plist)->ctg > pnew->ctg ||
            ((*plist)->ctg == pnew->ctg && (*plist)->pos >= pnew->pos))
    {
        pnew->next = *plist;
        *plist = pnew; 
        return;   
    }
    /* p will be the node after which we want to add the new one */
    p = *plist;
    while (p && p->next)
    {
        if (p->next->ctg > pnew->ctg) break;
        if (p->next->ctg == pnew->ctg && p->next->pos > pnew->pos) break;
        p = p->next;        
    }
    pnew->next = p->next;
    p->next = pnew;
}

/* compute correlation coeff for sequence to contig hits */
float seq_hit_corr(struct temphitlist* t1, struct temphitlist* t2)
{
    double x, y;
    double x_avg = 0, y_avg = 0, xy_avg = 0, N = 0, xx_avg = 0, yy_avg = 0;
    double num, r, sdx, sdy, den;
    struct temphitlist *p;

    assert(t1);
    assert(t2);
    
    for(p = t1; p != t2->next; p = p->next)
    {
        N++;
        x = (double)p->pos;
        y = (double)p->seq_pos;
        x_avg += x;
        y_avg += y;
        xy_avg += x*y;
        xx_avg += x*x;
        yy_avg += y*y;
    } 
    if (N < 2)
    {
        return 1;
    }   
    x_avg /= N;
    y_avg /= N;
    xy_avg /= N;
    xx_avg /= N;
    yy_avg /= N;
    
    num = (xy_avg - x_avg*y_avg);
    sdx = sqrt(xx_avg - x_avg*x_avg);
    sdy = sqrt(yy_avg - y_avg*y_avg);
    den = sdx*sdy;

    r = 0;
    if (den != 0)
    {
        r = num/den;
    }

    return (float)r;
}

/* compute correlation coeff for sequence to contig hits */
float hit_correlation(GArray* a)
{
    double x, y;
    double x_avg = 0, y_avg = 0, xy_avg = 0, N = 0, xx_avg = 0, yy_avg = 0;
    double num, r, sdx, sdy, den;
    struct temphitlist *p;
    int i;

    for (i = 0; i < a->len; i++)
    {
        p = g_array_index(a,struct temphitlist*,i);

        N++;
        x = (double)p->pos;
        y = (double)p->seq_pos;
        x_avg += x;
        y_avg += y;
        xy_avg += x*y;
        xx_avg += x*x;
        yy_avg += y*y;
    } 
    if (N < 2)
    {
        return 1;
    }   
    x_avg /= N;
    y_avg /= N;
    xy_avg /= N;
    xx_avg /= N;
    yy_avg /= N;
    
    num = (xy_avg - x_avg*y_avg);
    sdx = sqrt(xx_avg - x_avg*x_avg);
    sdy = sqrt(yy_avg - y_avg*y_avg);
    den = sdx*sdy;

    r = 0;
    if (den != 0)
    {
        r = num/den;
    }

    return (float)r;
}
char is_at_end(int ctg, int ctg1, int ctg2)
{
    if (ctg2 >= contigs[ctg].right - Pz.fromendInt) // factor of 2 since hit pos is taken as middle of clone
    {
        return 'R';
    }  
    else if (ctg1 <= contigs[ctg].left + Pz.fromendInt)
    {
        return 'L';
    }
    return 0;
}

/* Find alignment(s) for ctg to seq 
    Method: 
        Slide window along ctg
        Foreach ctg win with 10 hits
            Sort hits by seq location
            Slide window along seq
            If (hits > 10)
                Add to window pairs

        Merge window pairs

*/

typedef struct _winpair
{
    int ctg1, ctg2;
    int seq1, seq2;
} winpair;

typedef struct _seq_align
{
    int seq_start;
    int seq_end;
    int ctg;
    int ctg_start;
    int ctg_end;
    int nhits;
    float corr;
}
seq_align;


// This function assumes that pl1 starts to the left of pl2 within the contig
int placements_overlap(struct seqctgpos* pl1, struct seqctgpos* pl2, char* prl1, char* prl2, int* polap)
{
    int s1, e1, s2, e2, ret;    

    s1 = pl1->ctg_start;
    e1 = pl1->ctg_end;
    s2 = pl2->ctg_start;
    e2 = pl2->ctg_end;

    ret = 0;
    *polap = -(s2 - e1);
    if (*polap >= -Pz.fromendInt)
    {
        ret = 1;

        // Have to figure out whether the R/L end of the sequences is involved

        // seq1 is the one on the left
        *prl1 = ' ';
        if (pl1->corr >= 0)
        {
            if (pl1->pseq->length - pl1->seq_end <= Pz.seq_fromendInt*1000)
            {
                *prl1 = 'R';
            }

        }
        else 
        {
            if (pl1->seq_start <= Pz.seq_fromendInt*1000)
            {
                *prl1 = 'L';
            }
        }

        // seq2 is the one on the right
        *prl2 = ' ';
        if (pl2->corr >= 0)
        {
            if (pl2->seq_start <= Pz.seq_fromendInt*1000)
            {
                *prl2 = 'L';
            }
        
       }
        else 
        {
            if (pl2->pseq->length - pl2->seq_end <= Pz.seq_fromendInt*1000)
            {
                *prl2 = 'R';
            }
 
        }
        
    }    


    return ret;
}
int intervals_overlap(int x,int y,int a,int b)
{
    assert(x <= y);
    assert(a <= b);

    if ( (a <= x && x <= b) ||
         (a <= y && y <= b) ||
         (x <= a && a <= y) ||
        ( x <= b && b <= y) )
    {
        return 1;
    }
    return 0;
}
int winpairs_overlap(winpair* w1, winpair* w2)
{
    if (intervals_overlap(w1->ctg1,w1->ctg2,w2->ctg1,w2->ctg2) &&
        intervals_overlap(w1->seq1,w1->seq2,w2->seq1,w2->seq2) )
    {
        return 1;
    }
    return 0;
}
void count_window_hits(seq_align* sa, struct temphitlist* phits, int* pcount, float* pcorr)
{
    struct temphitlist* ph;
    int count =0;
    GArray* whits;
    
    whits = g_array_new(0,0,sizeof(struct temphitlist*));
    *pcorr = 0;
    for (ph = phits; ph; ph = ph->next)
    {
        if (sa->seq_start <= ph->seq_pos && ph->seq_pos <= sa->seq_end 
                && sa->ctg_start <= ph->pos && ph->pos <= sa->ctg_end)
        {
            count++;
            g_array_append_val(whits,ph);
        }
    }
    *pcorr = hit_correlation(whits);
    *pcount = count;
    g_array_free(whits,TRUE); 
}
/* using the given range of hits, walk on the seq side and find seq windows */

int seq_side_sort(const void* a, const void* b)
{
    struct temphitlist* t1 = *(struct temphitlist**)a;
    struct temphitlist* t2 = *(struct temphitlist**)b;

    if (t1->seq_pos < t2->seq_pos) return -1;
    else if (t1->seq_pos > t2->seq_pos) return 1;
    return 0;
}

void seq_walk(struct temphitlist* t1, struct temphitlist* t2, GArray* pairs)
{
    GArray* tha;
    struct temphitlist* t, *tstart,*tend,*tnext;
    int istart, iend, i, j;
    winpair pair, *wp1;
    int nhits;
    int minstep;
    int window_size;
    int count = 0;
    CLONE* clp1, *clp2;
    GArray* new_pairs;
    int found = 0;

    new_pairs = g_array_new(0,0,sizeof(winpair));

    window_size = SeqProps.window_size*1000;
    minstep = window_size/10;

    /* first re-sort the hits by sequence location */

    tha = g_array_new(0,0,sizeof(struct temphitlist*));
    for (t = t1; t != t2->next ; t = t->next)
    {
        g_array_append_val(tha,t);
        count++;
    }    
    g_array_sort(tha,seq_side_sort);


  /* now slide the window on the seq side */
    istart = 0;
    iend = istart;
    nhits = 1;
    while (iend < tha->len)
    {
        tstart = g_array_index(tha,struct temphitlist*,istart);
        tend = g_array_index(tha,struct temphitlist*,iend);
        /* advance the end pointer to the max range */
        while (iend < tha->len -1)
        {
            tnext = g_array_index(tha,struct temphitlist*,iend+1);
            if (tnext->seq_pos - tstart->seq_pos <= window_size )
            {
                tend = tnext;
                iend++;
                nhits++;
            } 
            else
            {
                break;
            }
        }
        if (nhits >= SeqProps.min_clones)
        {
            /* we've been using midpoints but for the window coords we will use
                endpoints, otherwise it can easily fail fromEnd tests when it
                really does reach the end of the contig */

            clp1 = arrp(acedata,tstart->clone_idx,CLONE);
            clp2 = arrp(acedata,tend->clone_idx,CLONE);
            pair.ctg1 = MiN(clp1->x,clp2->y);
            pair.ctg2 = MaX(clp1->x,clp2->y);

            for (i = istart; i <= iend; i++)
            {
                t = g_array_index(tha,struct temphitlist*,i);
                clp1 = arrp(acedata,t->clone_idx,CLONE);
                pair.ctg1 = MiN(pair.ctg1,clp1->x);
                pair.ctg2 = MaX(pair.ctg2,clp1->y);
            }
            pair.seq1 = tstart->seq_start;    
            pair.seq2 = tend->seq_end;

            /* Here, since we already restricted to a window on the ctg side, it's safe
             to greedily merge pairs instead of using transitive closure. Merging 
             at this stage helps keep the input to the transitive closure under control. 
            */

            found = 0;
            for (j = 0; j < new_pairs->len; j++)
            {
                wp1 = &g_array_index(new_pairs,winpair,j);
                if (winpairs_overlap(&pair,wp1))
                {
                    //printf("merge winpair: ctg(%d,%d) seq(%d,%d)\n",pair.ctg1,pair.ctg2,pair.seq1,pair.seq2);fflush(0);                   
                    wp1->ctg1 = MiN(pair.ctg1,wp1->ctg1);
                    wp1->seq1 = MiN(pair.seq1,wp1->seq1);
                    wp1->ctg2 = MaX(pair.ctg2,wp1->ctg2);
                    wp1->seq2 = MaX(pair.seq2,wp1->seq2);
                    found = 1; 
                    //printf("merged winpair: ctg(%d,%d) seq(%d,%d)\n",wp1->ctg1,wp1->ctg2,wp1->seq1,wp1->seq2);fflush(0);                   
                }
                if (found) break;
            }
            if (!found)
            {
                g_array_append_val(new_pairs,pair);
                //printf("new winpair: ctg(%d,%d) seq(%d,%d)\n",pair.ctg1,pair.ctg2,pair.seq1,pair.seq2);fflush(0);
            }
        }
        if (istart < tha->len-1)
        {
            /* advance the start pointer and continue */
            istart++;
            nhits--;
            while ( istart < iend 
                        && g_array_index(tha,struct temphitlist*,istart+1)->seq_pos - tstart->seq_pos < minstep)
            {
                istart++;
                nhits--;
            }

        }
        else 
        {
            break;
        }
    }
    g_array_free(tha,TRUE);

    for (j = 0; j < new_pairs->len; j++)
    {
        g_array_append_val(pairs,g_array_index(new_pairs,winpair,j));
    }

    g_array_free(new_pairs,TRUE);
}


int int_sort (const void* pa, const void* pb)
{
    int a = *(int*)pa;
    int b = *(int*)pb;

    if (a < b) return -1;
    if (a > b) return 1;
    return 0;
}


/* increment this BES hit count and return 1 if it was previously zero 
	purpose is to only count once a BES which may hit multiple times in
	the hit window
*/
int add_clone_hit(struct temphitlist* t, int* l, int* r)
{
    int cidx = t->clone_idx;
    char rf = t->rf;
    int* clones = (rf == 'r' ? r : l);
    int ret = (clones[cidx] > 0 ? 0 : 1);
    clones[cidx]++;
    return ret;
}
/* remove this BES hit and return 1 if its counter is now zero */
int remove_clone_hit(struct temphitlist* t, int* l, int* r)
{
    int cidx = t->clone_idx;
    char rf = t->rf;
    int* clones = (rf == 'r' ? r : l);
    assert(clones[cidx] >= 1);
    int ret = (clones[cidx] == 1 ? 1 : 0);
    clones[cidx]--;
    return ret;
}

void pair_align(int ctg, FPC_SEQ* pseq, GArray* seq_aligns)
{
    SEQHIT* phits;
    struct temphitlist *thits, *newthit, *th1, *th2, *thnext;
    CLONE* clp;
    FPC_SEQ_LIST* pctgs;
    FPC_SEQ* pctg;
    int nhits;
    int window_size;
    int i,j, k,j1,k1;
    assert(pseq);
    GArray* winpairs;
    seq_align sa;
    int min_hits;
    winpair wp1,wp2;
    int minstep;
    int multi;
    char* adj;
    short* adj_sparse;
    short* adj_len;
    short npairs;
    int* clones_r, *clones_f;
    int ctgsize;

    winpairs = g_array_new(0,0,sizeof(winpair));

    //  if (ctg != 4) return;

    /* list the hits between these two, sorted by contig position */
    nhits = 1;
    thits = 0;
    phits = pseq->hits;
    while (phits)
    {
        clp = arrp(acedata,phits->clone_idx,CLONE);
        if (clp->ctg == ctg)
        {
            newthit = (struct temphitlist*)malloc(sizeof(struct temphitlist));  
            newthit->ctg = clp->ctg;
            newthit->pos = (int)(.5*(clp->x + clp->y));
            newthit->seq_pos = (int)(.5*(phits->seq_start + phits->seq_end));
            newthit->seq_start = phits->seq_start;
            newthit->seq_end = phits->seq_end;
            newthit->seq = pseq;
            newthit->clone_idx = phits->clone_idx;
            newthit->rf = phits->rf;
            insert_temp_hit(&thits, newthit);
            nhits++;
       }
        phits = phits->next;
    }
    pctgs = pseq->ctgs;
    while (pctgs)
    {
        pctg = pctgs->seq;
        phits = pctg->hits;
        while (phits)
        {
            clp = arrp(acedata,phits->clone_idx,CLONE);
            if (clp->ctg == ctg)
            {
                newthit = (struct temphitlist*)malloc(sizeof(struct temphitlist));  
                newthit->ctg = clp->ctg;
                newthit->pos = (int)(.5*(clp->x + clp->y));
                newthit->seq_pos = pctg->pos + (int)(.5*(phits->seq_start + phits->seq_end));
                newthit->seq_start = pctg->pos + phits->seq_start;
                newthit->seq_end = pctg->pos + phits->seq_end;
                newthit->seq = pctg;
                newthit->clone_idx = phits->clone_idx;
                newthit->rf = phits->rf;
                insert_temp_hit(&thits, newthit);
                nhits++;
            }
            phits = phits->next;
        }        
        pctgs = pctgs->next;
    }

    min_hits = (SeqProps.min_clones >= 1 ? SeqProps.min_clones : 1);

    /* Slide the window and find the clusters. For each one, slide second window on seq side. */
    /* We use a minimum step to keep the number of windows under control. */

    window_size = (int)((SeqProps.window_size*1000)/Proj.avgbandsize);  // in cb units
    ctgsize = contigs[ctg].right - contigs[ctg].left;
    minstep = MaX(window_size/10,ctgsize/1000);
    
    /* need arrays to count unique BES hits */

    clones_r = malloc(arrayMax(acedata)*sizeof(int));
    clones_f = malloc(arrayMax(acedata)*sizeof(int));
    memset(clones_r,0,arrayMax(acedata)*sizeof(int));
    memset(clones_f,0,arrayMax(acedata)*sizeof(int));

    th1 = thits;
    nhits = 1;
    th2 = th1;
    add_clone_hit(th1,clones_r,clones_f);

    while (1)
    {
        /* advance the back pointer by at least one, if possible */
        if (th2->next) 
        {
            th2 = th2->next;
            if (add_clone_hit(th2,clones_r,clones_f))
            {
                nhits++;
            }
        }

        /* Advance the front pointer now to get the window back in range */
        while (th1 != th2)
        {
            if (th1->pos < th2->pos - window_size)
            {
                if (remove_clone_hit(th1,clones_r,clones_f))
                {
                    nhits--;
                }
                th1 = th1->next;
            }
            else break;
        }

        /* now advance the back pointer as far as possible */

        while (1)
        {

            /* Look ahead and make sure we're not at the end of the
                    acceptable range. */
            if (th2->next && th2->next->ctg == th1->ctg
                    && th2->next->pos <= th1->pos + window_size)
            {
                th2 = th2->next; 
                if (add_clone_hit(th2,clones_r,clones_f))
                {
                    nhits++;
                }
            }
            else
            {
                break;
            } 
        } 
        if (nhits >= min_hits)
        {
            /* This window has enough. Walk on the seq side */

            //printf("ctgwin:seq %s ctg %d start %d, end %d, hits %d\n",pseq->name,ctg,th1->pos,th2->pos,nhits);fflush(0);
 
            seq_walk(th1,th2,winpairs);

            //printf("ctgwin:start %d, end %d, hits %d, %d winpairs\n",th1->pos,th2->pos,nhits,winpairs->len);fflush(0);
           
        }

        /* advance the front pointer by at least one and by a maximum of minstep */        
        thnext = th1->next;
        if (!thnext) break;

        if (remove_clone_hit(th1,clones_r,clones_f))
        {
            nhits--;
        }

        while (thnext != th2 && thnext->next && thnext->next->pos - th1->pos < minstep )
        {
            if (remove_clone_hit(thnext,clones_r,clones_f))
            {
                nhits--;
            }
            thnext = thnext->next;
        }
        th1 = thnext;
    
    }
    free(clones_f);
    free(clones_r);

    /* 
        Make adjacency matrix and do transitive closure, with auxiliary arrays for speed. 
        If the number of pairs won't fit in a short, it's too many to handle.
    */
    npairs = (short)winpairs->len;

    if (npairs != winpairs->len)
    {
        printf("Alignment of sequence %s to contig %d - too many pairs (%d)\n",pseq->name,ctg,winpairs->len);fflush(0);
        return;
    }

    adj = (char*)malloc(npairs*npairs*sizeof(char));
    if (!adj)
    {
        printf("unable to alloc memory for alignment of sequence %s to contig %d\n",pseq->name,ctg);fflush(0);
        return;
    }
    adj_sparse = (short*)malloc(npairs*npairs*sizeof(short));
    if (!adj_sparse)
    {
        printf("unable to alloc memory for alignment of sequence %s to contig %d\n",pseq->name,ctg);fflush(0);
        return;
    }
    adj_len = (short*)malloc(npairs*sizeof(short));
    if (!adj_len)
    {
        printf("unable to alloc memory for alignment of sequence %s to contig %d\n",pseq->name,ctg);fflush(0);
        return;
    }

    memset(adj,0,npairs*npairs*sizeof(char));               /* adjacency matrix */
    memset(adj_sparse,0,npairs*npairs*sizeof(short));       /* adjacency hint matrix; like a sparse, but a superset of the adjacency */ 
    memset(adj_len,0,npairs*sizeof(short));                 /* length of adjacency hint array */

    for (i = 0; i < npairs; i++)
    {
        /* link to itself; we really only need the first line so we can tell 
            a singleton from a removed link */
        adj[i + i*npairs] = 1;
        adj_sparse[adj_len[i] + i*npairs] = (short)i;
        adj_len[i]++; 

        wp1 = g_array_index(winpairs,winpair,i);
        //printf("window:ctg %d-%d seq %d-%d\n",wp1.ctg1,wp1.ctg2,wp1.seq1,wp1.seq2);fflush(0);
        for (j = i+1; j < npairs; j++)
        {
            wp2 = g_array_index(winpairs,winpair,j);
            if (winpairs_overlap(&wp1,&wp2))
            {
                adj[j + i*npairs] = 1;
                adj_sparse[adj_len[i] + i*npairs] = (short)j;
                adj_len[i]++;

                adj[i + j*npairs] = 1;
                adj_sparse[adj_len[j] + j*npairs] = (short)i;
                adj_len[j]++;

                //printf("olap: %d:(%d %d %d %d) %d:(%d %d %d %d)\n",i,wp1.ctg1,wp1.ctg2,wp1.seq1,wp1.seq2,
                //                                                j,wp2.ctg1,wp2.ctg2,wp2.seq1,wp2.seq2);fflush(0);

            }
            else
            {
                //printf("no olap: %d:(%d %d %d %d) %d:(%d %d %d %d)\n",i,wp1.ctg1,wp1.ctg2,wp1.seq1,wp1.seq2,
                //	                                                j,wp2.ctg1,wp2.ctg2,wp2.seq1,wp2.seq2);fflush(0);
            }
        }

    }
   /* Compute the transitive closure of the pairs, by collapsing triads (i,j),(j,k) to (i,j),(i,k) .*/    

    for (i = 0; i < npairs; i++)
    {
        for (j1 = 0; j1 < adj_len[i]; j1++)
        {
            j = adj_sparse[j1 + i*npairs];
            if (j == i) continue;
            if (0 == adj[j + i*npairs]) continue;

            for (k1 = 0; k1 < adj_len[j]; k1++)
            {
                k = adj_sparse[k1 + j*npairs];
                if (k == j) continue;
                if (k == i) continue;
                if (0 == adj[k + j*npairs]) continue; // the link j,k was previously broken
                if (1 == adj[k + i*npairs]) continue; // i,k already exists
                
                /* Add link i,k  */
                adj[k + i*npairs] = 1;
                adj_sparse[adj_len[i] + i*npairs] = k;
                adj_len[i]++;

                /* Add link k,i  */
                adj[i + k*npairs] = 1;
                adj_sparse[adj_len[k] + k*npairs] = i;
                adj_len[k]++;
                
            }
            /* remove all j node links. j never needs to be considered again. */
            adj_len[j] = 0;
            for (k1 = 0; k1 < npairs; k1++)
            {
                adj[k1 + j*npairs] = 0;
                adj[j + k1*npairs] = 0;
            }
        } 

    }



    /* finish the clusters */

    multi = 0;
    for (i = 0; i < npairs; i++)
    {
        //printf("pair %d cluster size %d\n",i,adj_len[i]);fflush(0);
        if (adj_len[i] == 0) continue;  
        multi++;

        wp1 = g_array_index(winpairs,winpair,i);
        sa.ctg = ctg;
        sa.ctg_start = wp1.ctg1;
        sa.ctg_end = wp1.ctg2;
        sa.seq_start = wp1.seq1;
        sa.seq_end = wp1.seq2;

        for (j1 = 0; j1 < adj_len[i]; j1++)
        {
            j = adj_sparse[j1 + i*npairs];
            wp1 = g_array_index(winpairs,winpair,j);
            sa.ctg_start = MiN(sa.ctg_start,wp1.ctg1);
            sa.seq_start = MiN(sa.seq_start,wp1.seq1);
            sa.ctg_end = MaX(sa.ctg_end,wp1.ctg2);
            sa.seq_end = MaX(sa.seq_end,wp1.seq2);

        }
        count_window_hits(&sa,thits,&sa.nhits,&sa.corr);
        if (sa.nhits >= min_hits)
        {
            //printf("PLACE: seq %s (%d - %d), ctg %d (%d - %d) corr %lf\n",pseq->name,sa.seq_start,sa.seq_end,ctg,sa.ctg_start,sa.ctg_end,sa.corr);fflush(0);
            g_array_append_val(seq_aligns,sa);
        }
        
    }

    free(adj);
    free(adj_sparse);
    free(adj_len);

    g_array_free(winpairs,TRUE);


    th1 = thits;
    while (th1)
    {
        th2 = th1;
        th1 = th1->next;
        free(th2);
    }
    thits = 0;

}

int seq_align_sort(const void* a, const void* b)
{
    seq_align* p1 = (seq_align*)a;
    seq_align* p2 = (seq_align*)b;

    if (p1->nhits > p2->nhits) return -1;
    else if (p1->nhits < p2->nhits) return 1;
    else return 0;
}
void place_sequence(FPC_SEQ* pseq)
{
    struct seqctgpos *scp, *scp1, *ploc;
    SEQHIT *phits;
    struct temphitlist *thits;
    CLONE* clp;
    int i;
    int* ctg_counts;
    GArray* seq_aligns;
    int topn;
    seq_align* psa;
    char msg[1000];
    struct fpc_seq_list *sctg;  

    seq_aligns = g_array_new(0,0,sizeof(seq_align));

    assert(pseq);

    printf ("Place %s           \r",pseq->name);fflush(0);

    scp = pseq->ctgpos;
    while (scp)
    {
        scp1 = scp;
        scp = scp->next;
        free(scp1);                
    } 
    pseq->ctgpos = 0;     

    /* pass1 : find all candidate contigs */

    ctg_counts = (int*)malloc( (1+max_contig)*sizeof(int));
    memset(ctg_counts,0,(1+max_contig)*sizeof(int));

    thits = 0;
    phits = pseq->hits;
    while (phits)
    {
        clp = arrp(acedata,phits->clone_idx,CLONE);
        if (clp->ctg != 0)
        {
            if (1) //contigs[clp->ctg].ctgstat != DEAD && contigs[clp->ctg].ctgstat != AVOID)
            {
                ctg_counts[clp->ctg]++;
            }
        }
        phits = phits->next;
    }
    /* don't forget the seqctgs */
    for (sctg = pseq->ctgs; sctg; sctg = sctg->next)
    {
        for (phits = sctg->seq->hits; phits; phits = phits->next)
        {
            clp = arrp(acedata,phits->clone_idx,CLONE);
            if (clp->ctg != 0)
            {
                if (1) //contigs[clp->ctg].ctgstat != DEAD && contigs[clp->ctg].ctgstat != AVOID)
                {
                    ctg_counts[clp->ctg]++;
                }
            }
        }
    }

    /* pass2 : detailed alignment for each pair */

    for (i = 1; i <= max_contig; i++)
    {
        //printf( "ctg%d: %d hits\n",i,ctg_counts[i]);fflush(0);
        if (ctg_counts[i] >= SeqProps.min_clones)
        {
            pair_align(i,pseq,seq_aligns);
        }
    }

    free(ctg_counts);

    /* Sort and add the top n */

    g_array_sort(seq_aligns,seq_align_sort);

    topn = MiN(seq_aligns->len,(SeqProps.top_n > 0 ? SeqProps.top_n : seq_aligns->len));
    for (i = 0; i < topn; i++)
    {
        psa = &g_array_index(seq_aligns,seq_align,i);
        ploc = (struct seqctgpos*)malloc(sizeof(struct seqctgpos));
        memset(ploc,0,sizeof(struct seqctgpos));
        ploc->score = psa->nhits;
        ploc->pos = (int)(.5*(psa->ctg_start + psa->ctg_end));;
        ploc->ctg = psa->ctg;
        ploc->seq_start = psa->seq_start;
        ploc->seq_end = psa->seq_end;
        ploc->ctg_start = psa->ctg_start;
        ploc->ctg_end = psa->ctg_end;
        ploc->LR = is_at_end(psa->ctg,psa->ctg_start,psa->ctg_end);
        ploc->corr = psa->corr;
        ploc->pseq = pseq;  

        /* insert it in order of seq_start */
        if (!pseq->ctgpos || pseq->ctgpos->seq_start > ploc->seq_start)
        {
            ploc->next =  pseq->ctgpos;
            pseq->ctgpos = ploc;
        }
        else
        {
            for (scp = pseq->ctgpos; scp->next && scp->next->seq_start < ploc->seq_start; scp = scp->next);
            ploc->next = scp->next;
            scp->next = ploc;
        }
        contigs[psa->ctg].draft++;

        if (ploc->corr >= 0)
        {
            sprintf(msg," SEQ %s (%d hits); ",pseq->name,ploc->score);
        }
        else
        {
            sprintf(msg," SEQ %s (%d hits rev); ",pseq->name,ploc->score);
        }

        add_to_projmsg(ploc->ctg, msg);

       // printf("loc: seq %s (%d,%d) -> ctg %d (%d,%d) score:%d corr:%lf\n",pseq->name,ploc->seq_start,ploc->seq_end,
       //         ploc->ctg,ploc->ctg_start,ploc->ctg_end,ploc->score,ploc->corr);fflush(0);       
    }

}

void redo_sequence_placements()
{
    FPC_STACK tempstack;
    FPC_TREE* pnode;
    FPC_SEQ *pseq;
    int num_placed = 0;
    int i;

    if(graphActivate(gsequence))
    {
        graphDestroy();
    }
    if(ctg_window!=NULL) gtk_widget_destroy(ctg_window);

    if (!seq_tree.child)
    {
        return;
    }

    if(ctg_window!=NULL) gtk_widget_destroy(ctg_window);

    scan_seq_props();

    for (i=0; i<=max_contig; i++)
    { 
        contigs[i].draft=0;
        contigs[i].projmsg[0] = 0;
    }

    fpc_stack_init(&tempstack);  
    pnode = &seq_tree;
    printf("Placing sequences....\n");fflush(0);
    while ((pnode = fpc_tree_next_node(pnode, &tempstack))) 
    {
        if (!pnode->pdata) continue;
        pseq = (FPC_SEQ*)pnode->pdata;
        if (pseq->parent) continue;
        place_sequence(pseq); 
        if (pseq->ctgpos) num_placed++;   
    }
    printf("\n%d placed.\n",num_placed);fflush(0);
    fpc_stack_destroy(&tempstack);
}

void redo_sequence_placements_btn()
{
    redo_sequence_placements();
    Zproj_results(0); 
}

static int aligndata_ctgsort(const void* _p1, const void* _p2)
{
	AlignData* p1 = (AlignData*)_p1;
	AlignData* p2 = (AlignData*)_p2;

	int ctg1 = (p1->ctg_start + p1->ctg_end)/2;
	int ctg2 = (p2->ctg_start + p2->ctg_end)/2;

	if (p1->fpc_ctg > p2->fpc_ctg) return 1;
	else if (p1->fpc_ctg < p2->fpc_ctg) return -1;
	else 
	{
		if (ctg1 > ctg2) return 1;
		else if (ctg1 < ctg2) return -1;
	}
	return 0;
}
static int aligndata_seqsort(const void* _p1, const void* _p2)
{
	AlignData* p1 = (AlignData*)_p1;
	AlignData* p2 = (AlignData*)_p2;

	if (!strcmp(p1->seq_name,p2->seq_name))
	{
		if (p1->seq_start > p2->seq_start) return 1;
		else if (p1->seq_start < p2->seq_start) return -1;
		
	}
	return lexstrcmp_fpc(p1->seq_name,p2->seq_name);
}

void seq_stats()
{
    char newfilename[MAXPATHLEN];
    char newdirname[MAXPATHLEN];
    char end[MAXPATHLEN];
    char outfile[MAXPATHLEN]; 
    FILE* f;
    FPC_STACK tempstack;
    FPC_TREE* pnode;
    FPC_SEQ *pseq;
    struct seqctgpos* ppos;
    int ctg;
    int spans_ctg;
    int spans_seq;
    long total_seqlen = 0;
    long total_ctglen = 0;
    long total_aligned_seqlen = 0;
    long total_aligned_ctglen = 0;
    int total_aligned_ctgs = 0;
    int total_ctgs = 0;
    int *ctg_aligned;
    int pct = 0;
    float lpct = 0;
	int ctgsort = 0;
	int naligns = 0;
	AlignData* adata;
	int i;
	
    strcpy(newdirname,".");
    strcpy(newfilename,"seq2fpc.txt");
    end[0] = 0;

    ctg_aligned = (int*)malloc(sizeof(int)*(max_contig+1));
    memset(ctg_aligned,0,sizeof(int)*(max_contig+1));

    f = (FILE*)graphQueryOpen(newdirname,newfilename,end,"w","Enter output file name");
    if (f == NULL) return;

    sprintf(outfile,"%s/%s.%s",newdirname,newfilename,end);
    printf("Writing file %s...",outfile);fflush(0);


	ctgsort = messQuery("Sort results by FPC contig (choose no to sort by draft sequence)?");

	if (ctgsort)
	{
		fprintf(f,"%-10s%-18s%-10s%-10s%-10s%-10s%-10s%-10s%-10s\n","FPC ctg","Sequence",
                    "ctg_start","ctg_end","seq_start","seq_end","orient","spans ctg","spans seq");
	}
	else
	{
		fprintf(f,"%-18s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s\n","Sequence",
                    "seq_start","seq_end","FPC ctg","ctg_start","ctg_end","orient","spans ctg","spans seq");
	}

	/* go through once to count, then again to fill */

    fpc_stack_init(&tempstack);  
    pnode = &seq_tree;
    while ((pnode = fpc_tree_next_node(pnode, &tempstack))) 
    {
        if (!pnode->pdata) continue;
        pseq = (FPC_SEQ*)pnode->pdata;
        if (pseq->parent) continue;
        for (ppos = pseq->ctgpos; ppos; ppos = ppos->next)
        {
			naligns++;
        }        
    }
    fpc_stack_destroy(&tempstack);

	if (naligns == 0)
	{
		printf("No alignments!\n");fflush(0);return;
	}	

	adata = (AlignData*)malloc(naligns*sizeof(AlignData));

    fpc_stack_init(&tempstack);  
    pnode = &seq_tree;
	i = 0;
    while ((pnode = fpc_tree_next_node(pnode, &tempstack))) 
    {
        if (!pnode->pdata) continue;
        pseq = (FPC_SEQ*)pnode->pdata;
        if (pseq->parent) continue;
        total_seqlen += pseq->length;        
        for (ppos = pseq->ctgpos; ppos; ppos = ppos->next)
        {
            ctg = ppos->ctg;
            ctg_aligned[ctg] = 1;
            spans_ctg = 'N';
            if (ppos->ctg_start < contigs[ctg].left + Pz.fromendInt &&
                ppos->ctg_end > contigs[ctg].right - Pz.fromendInt)
            {
                spans_ctg = 'Y';
            }
			spans_seq = 'N';
            if (ppos->seq_start < 1000*Pz.seq_fromendInt &&
                ppos->seq_end > pseq->length - 1000*Pz.seq_fromendInt)
            {
                spans_seq = 'Y';
            }
			

            total_aligned_ctglen += (ppos->ctg_end - ppos->ctg_start);
            total_aligned_seqlen += (ppos->seq_end - ppos->seq_start);

			adata[i].fpc_ctg = ctg;
			strncpy(adata[i].seq_name,pseq->name,99);
			adata[i].seq_start = ppos->seq_start;
			adata[i].seq_end = ppos->seq_end;
			adata[i].ctg_start = ppos->ctg_start;
			adata[i].ctg_end = ppos->ctg_end;
			adata[i].orient = (ppos->corr >= 0 ? 1 : -1);
			adata[i].spans_ctg = spans_ctg;
			adata[i].spans_seq = spans_seq;

			i++;
        }        
    }
    fpc_stack_destroy(&tempstack);  

	if (ctgsort)
	{
		qsort(adata,naligns,sizeof(AlignData),aligndata_ctgsort);
	}      
	else
	{
		qsort(adata,naligns,sizeof(AlignData),aligndata_seqsort);
	}


	for (i = 0; i < naligns; i++)
	{
		if (ctgsort)
		{
    		fprintf(f,"%-10d%-18s%-10d%-10d%-10d%-10d%-10d%-10c%-10c\n",adata[i].fpc_ctg,adata[i].seq_name,
			      adata[i].ctg_start,adata[i].ctg_end,adata[i].seq_start,adata[i].seq_end,adata[i].orient, 
				adata[i].spans_ctg,adata[i].spans_seq);
		}
		else
		{
    		fprintf(f,"%-18s%-10d%-10d%-10d%-10d%-10d%-10d%-10c%-10c\n",adata[i].seq_name,
			      adata[i].seq_start,adata[i].seq_end,adata[i].fpc_ctg,adata[i].ctg_start,adata[i].ctg_end,adata[i].orient, 
				adata[i].spans_ctg,adata[i].spans_seq);

		}
		
	}        
    printf("\n");fflush(0);

    for (ctg = 1; ctg <= max_contig; ctg++)
    {
        if (contigs[ctg].count > 0)
        {
            total_ctgs++;
            if (ctg_aligned[ctg] == 1)
            {
                total_aligned_ctgs++;
            }
            total_ctglen += (contigs[ctg].right - contigs[ctg].left);
        }
    }
    pct = (int)(100*total_aligned_ctgs/total_ctgs);
    fprintf(f,"%d contigs aligned (%d%% of %d)\n",total_aligned_ctgs,pct,total_ctgs);fflush(0);

    pct = (int)(100*total_aligned_ctglen/total_ctglen);
    fprintf(f,"%ld cb units aligned (%d%% of %ld)\n",total_aligned_ctglen,pct,total_ctglen);fflush(0);

    lpct = ((float)100)*((float)total_aligned_seqlen)/((float)total_seqlen);
    fprintf(f,"%ld seq length aligned (%lf%% of %ld)\n",total_aligned_seqlen,lpct,total_seqlen);fflush(0);

    fclose(f);   
    free(ctg_aligned);
}

int is_whitespace(char* str)
{
    int i;
    int whitespace = 1;

    for (i = 0; i < strlen(str); i++)
    {
        if (!isspace(str[i]))
        {
            whitespace = 0;     
            break;
        }    
    }
    if (whitespace)
    {
        return 1;
    }
    return 0;
}

int glob_match(char* p, char* query)
{
    int match = 0;
    int i,j;
    char *copy, *s;

    if (*query == 0 || is_whitespace(query))
    {
        return 1;
    }
    copy = strdup(query);
    s = copy;

    if (s[0] == '*')
    {
        s++;
        if (strlen(s) > 0 && s[strlen(s) - 1] == '*')
        {
            //  *xxx*   
            s[strlen(s) - 1] = 0;   
            if (strstr(p,s))
            {
                match = 1;
            }
        }
        else
        {
            // *xxx
            // must doublecheck strstr as we might have *AB matching to ABAB
            if (strstr(p,s))
            {
                match = 1;
                for(i = strlen(s),j = strlen(p); i >= 0 && j >= 0; i--, j--)
                {
                    if (s[i] != p[j])
                    {
                        match = 0;
                        break;
                    }
                }
            }
        }
    }
    else
    {
        if (0 == strcmp(p,s))
        {
            match = 1;
        }
    }
    free(copy);
    return match;
}

void scan_seq_props()
{
    if (graphActivate(gSEQ))
    {
        sscanf(min_clones_sz,"%d",&SeqProps.min_clones);  
        sscanf(min_ctgs_sz,"%d",&SeqProps.min_ctgs); 
        sscanf(window_size_sz,"%d",&SeqProps.window_size);
        strcpy(SeqProps.search_name,"");
        if (!is_whitespace(search_name_sz))
        {
            strncpy(SeqProps.search_name, search_name_sz, 30);
        }
    }
}

void seqsearch_name(char* _pc, struct list* oldroot)
{
    struct list *newentry, *lastentry,*p;
    char* name;
    char* pc;

    pc = strdup(_pc);
    assert(oldroot);

    strncpy(SeqProps.search_name,pc,30);



    lowerstring(pc);
    p = oldroot;
    lastentry = 0;
    while (p) 
    {     
        name = strdup(p->seq->name);
        lowerstring(name);            
        if (glob_match(name,pc))
        {
            newentry = (struct list*)messalloc(sizeof(struct list));
            memset(newentry,0,sizeof(struct list));
            newentry->seq = p->seq;
            if (lastentry)
            {
                lastentry->next = newentry;
            }
            else
            {
                listroot = newentry;
            }
            lastentry = newentry;
        }
        free(name);
        p = p->next;
    }

    if (listroot)
    {
        return;        
    }
    if (!strstr(pc,"*"))
    {
        printf("Exact match (Name=%s) not found.  Looking for substring.\n",pc);fflush(0);

        lastentry = 0; 
        p = oldroot;
        while (p) 
        {     
            name = strdup(p->seq->name);
            lowerstring(name);            
            if (strstr(name,pc))
            {
                newentry = (struct list*)messalloc(sizeof(struct list));
                memset(newentry,0,sizeof(struct list));
                newentry->seq = p->seq;
                if (lastentry)
                {
                    lastentry->next = newentry;
                }
                else
                {
                    listroot = newentry;
                }
                lastentry = newentry;
            }
            free(name);
            p = p->next;
        }
    }
}

void free_keyset_list(struct list** list)
{
    struct list *p, *p1;
    p = *list;    
    while(p)
    {
        p1 = p;
        p=p->next;
        messfree(p1);
    }  
    *list = 0; 
}

void clear_listroot()
{
    if(graphActivate(g2))
    {    
        graphDestroy();
    }
    free_keyset_list(&listroot);
}

void dup_keyset_list(struct list** out, struct list* in)
{
    struct list* p, *newentry, *lastentry;

    lastentry = 0;
    for (p = in; p; p = p->next)
    {
        newentry = (struct list*)messalloc(sizeof(struct list));
        memset(newentry,0,sizeof(struct list));
        newentry->seq = p->seq;
        if (lastentry)
        {
            lastentry->next = newentry;
        }
        else
        {
            *out = newentry;
        }
        lastentry = newentry;        
    }   
}
int intervals_contained(int s1,int e1,int s2,int e2)
{
    if ( (s1 < s2 && e1 > e2) || (s2 < s1 && e2 > e1) )
    {
        return 1;
    }
    return 0;
}
int interval_gap(int s1,int e1,int s2,int e2)
{
    if (s2 > e1)
    {
        return (s2 - e1);
    }
    else if (s1 > e2)
    {
        return s1 - e2;
    }
    return 0;
}


void scan_fields()
{
    sscanf(ctg_fromend_sz,"%d",&Pz.fromendInt);   
    sscanf(seq_fromend_sz,"%d",&Pz.seq_fromendInt);   
    sscanf(ctg_minspan_sz,"%d",&Pz.minspanInt);   
    sscanf(seq_minspan_sz,"%d",&Pz.seq_minspanInt);   
    sscanf(unaligned_minclones_sz,"%d",&unaligned_minclones);   
    sscanf(unaligned_minseqlen_sz,"%d",&unaligned_minseqlen);   
}

// We indicate a join if two sequence alignments overlap or have only a small
// gap between them. Gapped alignments are notated by "G", end-overlaps
// by "E", and overlaps in interior regions (which really indicate 
// miassemblies) by "I"
void seq_joins()
{
    char newfilename[MAXPATHLEN];
    char newdirname[MAXPATHLEN];
    char end[MAXPATHLEN];
    char outfile[MAXPATHLEN]; 
    FILE* f;
    FPC_STACK tempstack;
    FPC_TREE* pnode;
    FPC_SEQ *pseq;
    int ctg;
    GArray** ctg2seq;
    int j,k;
    struct seqctgpos* ploc,*ploc1,*ploc2;
    int e1, e2, cspan, sspan;
    int seq_join = 0;
    int end_join = 0;
    char msg[1000];
    int olap;
    char rl1,rl2;

    for (ctg=0; ctg<=max_contig; ctg++)
    { 
        contigs[ctg].projmsg[0] = 0;
    }

    scan_fields();
   
    ctg2seq = (GArray**)malloc((max_contig+1)*sizeof(GArray*));
    memset(ctg2seq,0,(max_contig+1)*sizeof(GArray*));

    strcpy(newdirname,".");
    strcpy(newfilename,"seq_joins.txt");
    end[0] = 0;

    f = (FILE*)graphQueryOpen(newdirname,newfilename,end,"w","Enter output file name");
    if (f == NULL) return;

    sprintf(outfile,"%s/%s.%s",newdirname,newfilename,end);
    printf("Writing file %s...",outfile);fflush(0);

    // first we have to collect the superctgs aligning to the contig
    fpc_stack_init(&tempstack); 
    pnode = &seq_tree;
    while ((pnode = fpc_tree_next_node(pnode, &tempstack))) 
    {     
        if (!pnode->pdata) continue;
        pseq = (FPC_SEQ*)pnode->pdata;
        if (pseq->parent) continue;

        for (ploc = pseq->ctgpos; ploc; ploc = ploc->next)
        {
            ctg = ploc->ctg;
            if (contigs[ctg].ctgstat == DEAD || contigs[ctg].ctgstat == AVOID)
            {
                continue;
            }
            e1 = ploc->seq_start;
            e2 = pseq->length - ploc->seq_end;
            cspan = ploc->ctg_end - ploc->ctg_start;
            sspan = ploc->seq_end - ploc->seq_start;
            if (0) //cspan < Pz.minspanInt || sspan < Pz.seq_minspanInt*1000 )
            {
                continue;
            }

            if (1) //e1 <  Pz.seq_fromendInt*1000  ||  e2 < Pz.seq_fromendInt*1000)
            {
                if (!ctg2seq[ctg])
                {
                    ctg2seq[ctg] = g_array_new(FALSE, FALSE, sizeof(struct seqctgpos*));
                }
                g_array_append_val(ctg2seq[ctg],ploc);
            } 
        }
    }

    // now we have to go through all the pairs and print the
    // ones that don't conflict and don't have too big a gap

    for (ctg = 1; ctg <= max_contig; ctg++)
    {

        if (ctg2seq[ctg] && ctg2seq[ctg]->len >= 2)
        {
            for (j = 0; j < ctg2seq[ctg]->len; j++)
            {
                ploc1 = g_array_index(ctg2seq[ctg],struct seqctgpos*,j);
                for (k = j + 1; k < ctg2seq[ctg]->len; k++)
                {
                    ploc2 = g_array_index(ctg2seq[ctg],struct seqctgpos*,k);
                    
                    if (ploc1->pseq == ploc2->pseq) continue;
                    if (  
                            (ploc1->ctg_start <= ploc2->ctg_start && placements_overlap(ploc1,ploc2,&rl1,&rl2,&olap)) ||
                            (ploc2->ctg_start <= ploc1->ctg_start && placements_overlap(ploc2,ploc1,&rl2,&rl1,&olap)) 
                        )
                    {
                        seq_join++;

                        sprintf(msg,"SEQJOIN %s %s %d; ",ploc1->pseq->name,ploc2->pseq->name,olap);
                        add_to_projmsg(ctg, msg);

                        fprintf(f,"Ctg%d\t%s\t%s\t%d\n",ctg,ploc1->pseq->name,ploc2->pseq->name,olap);
                        if (rl1 != ' ' && rl2 != ' ') end_join++;
                    }

                }
            }
        }
    }             

    fpc_stack_destroy(&tempstack); 
    
    for (ctg = 0; ctg <= max_contig; ctg++)
    {
        if (ctg2seq[ctg])
        {
            g_array_free(ctg2seq[ctg],TRUE);
        }
    }
    printf("done.\n");fflush(0);
    printf("%d potential sequence merges found (%d on ends)\n",seq_join,end_join);fflush(0);
    free(ctg2seq);
    fclose(f);

    Zproj_results(0);
}

// go through the seqs, then through their aligned contigs, looking
// for merge pairs
void ctg_joins()
{
    char newfilename[MAXPATHLEN];
    char newdirname[MAXPATHLEN];
    char end[MAXPATHLEN];
    char outfile[MAXPATHLEN]; 
    FILE* f;
    FPC_STACK tempstack;
    FPC_TREE* pnode;
    FPC_SEQ *pseq;
    int ctg1,ctg2;
    struct seqctgpos*ploc1,*ploc2;
    int e1, e2, cspan, sspan;
    int njoin = 0;
    scan_fields();
    char msg[1000];
    int ctg;
	int gap;

    for (ctg=0; ctg<=max_contig; ctg++)
    { 
        contigs[ctg].projmsg[0] = 0;
    }

    strcpy(newdirname,".");
    strcpy(newfilename,"ctg_joins.txt");
    end[0] = 0;

    f = (FILE*)graphQueryOpen(newdirname,newfilename,end,"w","Enter output file name");
    if (f == NULL) return;

    sprintf(outfile,"%s/%s.%s",newdirname,newfilename,end);
    printf("Writing file %s...",outfile);fflush(0);

    fpc_stack_init(&tempstack); 
    pnode = &seq_tree;
    while ((pnode = fpc_tree_next_node(pnode, &tempstack))) 
    {     
        if (!pnode->pdata) continue;
        pseq = (FPC_SEQ*)pnode->pdata;
        if (pseq->parent) continue;

        for (ploc1 = pseq->ctgpos; ploc1; ploc1 = ploc1->next)
        {
            ctg1 = ploc1->ctg;
            if (contigs[ctg1].ctgstat == DEAD || contigs[ctg1].ctgstat == AVOID)
            {
                continue;
            }
            e1 = ploc1->ctg_start;
            e2 = contigs[ctg1].right - ploc1->ctg_end;
            cspan = ploc1->ctg_end - ploc1->ctg_start;
            sspan = ploc1->seq_end - ploc1->seq_start;
            if (0) //cspan < Pz.minspanInt || sspan < Pz.seq_minspanInt )
            {
                continue;
            }

            if (e1 <  Pz.fromendInt  ||  e2 < Pz.fromendInt)
            {
                // this contig has an endpoint on the sequence. Look for a paired contig. 

                for (ploc2 = ploc1->next; ploc2; ploc2 = ploc2->next)
                {
                    ctg2 = ploc2->ctg;
                    if (contigs[ctg2].ctgstat == DEAD || contigs[ctg2].ctgstat == AVOID)
                    {
                        continue;
                    }
                    e1 = ploc2->ctg_start;
                    e2 = contigs[ctg2].right - ploc2->ctg_end;
                    cspan = ploc2->ctg_end - ploc2->ctg_start;
                    sspan = ploc2->seq_end - ploc2->seq_start;
                    if (0) //cspan < Pz.minspanInt || sspan < Pz.seq_minspanInt )
                    {
                        continue;
                    }

                    if (e1 <  Pz.fromendInt  ||  e2 < Pz.fromendInt)
                    {
                        // this contig has an endpoint on the sequence.
                        // see if it's close enough to the previous.

						gap = interval_gap(ploc1->seq_start,ploc1->seq_end,ploc2->seq_start,ploc2->seq_end);
                        if (gap <= Pz.seq_fromendInt*1000)
                        {
                            njoin++;
                            fprintf(f,"%s\tCtg%d\tCtg%d\tGap %d\n",pseq->name,ctg1,ctg2,gap);

                            sprintf(msg," CTGJOIN %d (SEQ %s); ",ctg1,pseq->name);
                            add_to_projmsg(ctg2, msg);

                            sprintf(msg," CTGJOIN %d (SEQ %s); ",ctg2,pseq->name);
                            add_to_projmsg(ctg1, msg);
 
                        }
                    } 
                }


            } 
        }
    }
    fclose(f);
    printf("done.\n");fflush(0);
    printf("%d potential FPC contig merges found\n",njoin);fflush(0);

    Zproj_results(0);
}


// Misassemblies are indicated by alignments terminating in the center of
//  both sequence and contig
void seq_mis()
{
    char newfilename[MAXPATHLEN];
    char newdirname[MAXPATHLEN];
    char end[MAXPATHLEN];
    char outfile[MAXPATHLEN]; 
    FILE* f;
    FPC_STACK tempstack;
    FPC_TREE* pnode;
    FPC_SEQ *pseq;
    int ctg;
    struct seqctgpos* ploc;
    int ctgend_left, ctgend_right;
    int seqend_left, seqend_right;
    int mis_left = 0;
    int mis_right = 0;
    int* counts;
    int nmis = 0;
    int nmulti = 0;
    char LR[3];
    char detail[100];
    char* printed;
    char msg[1000];


    for (ctg=0; ctg<=max_contig; ctg++)
    { 
        contigs[ctg].projmsg[0] = 0;
    }

    scan_fields();

    strcpy(newdirname,".");
    strcpy(newfilename,"misassembly.txt");
    end[0] = 0;

    f = (FILE*)graphQueryOpen(newdirname,newfilename,end,"w","Enter output file name");
    if (f == NULL) return;

    sprintf(outfile,"%s/%s.%s",newdirname,newfilename,end);
    printf("Writing file %s...",outfile);fflush(0);

    // first we have to collect the superctgs with an endpoint in a contig
    fpc_stack_init(&tempstack); 
    pnode = &seq_tree;

    fprintf(f,"Potential Misassemblies\n\n");      
    fprintf(f,"MULTI = multiple alignment of sequence to contig\n");      
    fprintf(f,"L = inconsistent alignment termination, left end\n");      
    fprintf(f,"R = inconsistent alignment termination, right end\n");  
    fprintf(f,"\n");    
    fprintf(f,"%-10s%-15s%-10s%-15s%-15s\n","Ctg","Seq","Problem","Seq span (bp)","Ctg span (cb)");   

    while ((pnode = fpc_tree_next_node(pnode, &tempstack))) 
    {     
        if (!pnode->pdata) continue;
        pseq = (FPC_SEQ*)pnode->pdata;
        if (pseq->parent) continue;


        counts = (int*)malloc((max_contig+1)*sizeof(int));
        memset(counts,0,(max_contig+1)*sizeof(int));        
        printed = (char*)malloc((max_contig+1)*sizeof(char));
        memset(printed,0,(max_contig+1)*sizeof(char));        

        for (ploc = pseq->ctgpos; ploc; ploc = ploc->next)
        {
            ctg = ploc->ctg;
            if (contigs[ctg].ctgstat == DEAD || contigs[ctg].ctgstat == AVOID)
            {
                continue;
            }
            counts[ctg]++;
        }
        
        for (ploc = pseq->ctgpos; ploc; ploc = ploc->next)
        {
            ctg = ploc->ctg;
            if (contigs[ctg].ctgstat == DEAD || contigs[ctg].ctgstat == AVOID)
            {
                continue;
            }
            if (counts[ctg] > 1) 
            {
                if (printed[ctg] == 0)
                {
                    nmulti++;
                    fprintf(f,"%-10d%-15s%-10s\n",ploc->ctg,ploc->pseq->name,"MULTI");
                    sprintf(msg," MULTI %s; ",pseq->name);
                    add_to_projmsg(ploc->ctg, msg);   
                    printed[ctg] = 1;
                }
                continue;    
            }

            // check the endpoint pairs

            seqend_left = 0;
            seqend_right = 0;
            ctgend_left = 0;
            ctgend_right = 0;
            mis_left = 0;
            mis_right = 0;

            if (ploc->seq_start < Pz.seq_fromendInt*1000) seqend_left = 1;
            if (pseq->length - ploc->seq_end < Pz.seq_fromendInt*1000) seqend_right = 1;

            if (ploc->ctg_start < Pz.fromendInt) ctgend_left = 1;
            if (contigs[ploc->ctg].right - ploc->ctg_end < Pz.fromendInt) ctgend_right = 1;

            if (ploc->corr > 0)
            {
                if (ctgend_left == 0 && seqend_left == 0) mis_left = 1;      
                if (ctgend_right == 0 && seqend_right == 0) mis_right = 1;      
            }
            else
            {
                if (ctgend_left == 0 && seqend_right == 0) mis_left = 1;      
                if (ctgend_right == 0 && seqend_left == 0) mis_right = 1;      

            }

            if (mis_left == 1 || mis_right == 1)
            {
                LR[0] = 0;
                if (mis_left) strcat(LR,"L");
                if (mis_right) strcat(LR,"R");
                nmis++;
                detail[0] = 0;
                if (ctgend_right == 0 && contigs[ploc->ctg].right - ploc->ctg_end < 2*Pz.fromendInt)
                {
                    strcat(detail,"Extra clones, contig right end;");
                }
                if (ctgend_left == 0 && ploc->ctg_start < 2*Pz.fromendInt)
                {
                    strcat(detail,"Extra clones, contig left end;");
                }
                if (seqend_right == 0 && pseq->length - ploc->seq_end < 2*Pz.seq_fromendInt*1000)
                {
                    strcat(detail,"Extra sequence, seq right end;");
                }
                if (seqend_left == 0 && ploc->seq_start < 2*Pz.seq_fromendInt*1000)
                {
                    strcat(detail,"Extra sequence, seq left end;");
                }

                sprintf(msg," BADTERM %s",pseq->name);
                add_to_projmsg(ploc->ctg, msg);

                fprintf(f,"%-10d%-15s%-10s%-15d%-15d\n", ploc->ctg,ploc->pseq->name,LR,ploc->seq_end-ploc->seq_start,ploc->ctg_end-ploc->ctg_start);   
            }
        }
        free(printed);
        printed=0;
        free(counts);
        counts = 0;
    }
    fclose(f);
    printf("done.\n");fflush(0);
    printf("%d potential misassemblies (%d within one contig)\n",nmis+nmulti,nmulti);fflush(0);

    Zproj_results(0); 

}


// find contigs that have a seq ending in the middle
void ctg_partial_search()
{
    int i;
    struct list *newentry, *lastentry;
    FPC_STACK tempstack;
    FPC_TREE* pnode;
    struct seqctgpos* ploc;
    FPC_SEQ* pseq;
    int *ctgs;
    int ctg;

    scan_seq_props();

    /* we're going to ignore any existing keyset */

    clear_listroot();
    lastentry = 0;

    classctg = CTGCLASS;

    ctgs = (int*)malloc((max_contig+1)*sizeof(int));
    memset(ctgs,0,(max_contig+1)*sizeof(int));

    /* walk the seq placements and find ones with end/start not within the ctg end limit */

    fpc_stack_init(&tempstack); 
    pnode = &seq_tree;
    while ((pnode = fpc_tree_next_node(pnode, &tempstack))) 
    {     
        if (!pnode->pdata) continue;
        pseq = (FPC_SEQ*)pnode->pdata;
        if (pseq->parent) continue;

        for (ploc = pseq->ctgpos; ploc; ploc = ploc->next)
        {
            ctg = ploc->ctg;
            if (ploc->ctg_start > contigs[ctg].left + Pz.fromendInt  ||  
                ploc->ctg_end < contigs[ctg].right - Pz.fromendInt)
            {
                ctgs[ctg]++;
            } 
        }
    }
    fpc_stack_destroy(&tempstack); 

  
    for (i=1; i <= max_contig; i++)
    {
        if (ctgs[i] > 0)
        {
            newentry = (struct list*)messalloc(sizeof(struct list));
            memset(newentry,0,sizeof(struct list));
            newentry->index = i;
            if (lastentry)
            {
                lastentry->next = newentry;
            }
            else
            {
                listroot = newentry;
            }
            lastentry = newentry;
        }        
    }

    free(ctgs);

    displaykeyset(1);
      
}
// find contigs that have multiple placed seqs
void ctg_multi_search()
{
    int i;
    struct list *newentry, *lastentry;
    FPC_STACK tempstack;
    FPC_TREE* pnode;
    FPC_SEQ* pseq;
    struct seqctgpos* ploc;
    int* counts;
    
    scan_seq_props();

    /* we're going to ignore any existing keyset */

    clear_listroot();
    lastentry = 0;

    classctg = CTGCLASS;

    counts = (int*)malloc((max_contig+1)*sizeof(int));
    memset(counts,0,(max_contig+1)*sizeof(int));

    /* walk the seq placements and generate contig counts */

    fpc_stack_init(&tempstack); 
    pnode = &seq_tree;
    while ((pnode = fpc_tree_next_node(pnode, &tempstack))) 
    {     
        if (!pnode->pdata) continue;
        pseq = (FPC_SEQ*)pnode->pdata;
        if (pseq->parent) continue;

        for (ploc = pseq->ctgpos; ploc; ploc = ploc->next)
        {
            counts[ploc->ctg]++;
        }
    }
    fpc_stack_destroy(&tempstack); 
  
    for (i=1; i <= max_contig; i++)
    {
        if (counts[i] >= 2)
        {
            newentry = (struct list*)messalloc(sizeof(struct list));
            memset(newentry,0,sizeof(struct list));
            newentry->index = i;
            if (lastentry)
            {
                lastentry->next = newentry;
            }
            else
            {
                listroot = newentry;
            }
            lastentry = newentry;
        }        
    }

    free(counts);

    displaykeyset(1);
}
void seq_search()
{
    int i;
    struct list *p, *p1, *oldroot, *firstlevel, *newentry, *lastentry;
    char str1[80];
    FPC_STACK tempstack;
    FPC_TREE* pnode;
    FPC_SEQ* pseq;
    struct seqctgpos* ploc;
    int at_end;
    
    scan_seq_props();

    oldroot = 0;

    /* Save off the existing list if it's already a seq class list */
    if (classctg == SEQCLASS)
    {
        dup_keyset_list(&oldroot,listroot);
    }
    clear_listroot();

    main_classctg = classctg;
    classctg = SEQCLASS;

    /* Initialize the search with either the old list or all sequences */
    firstlevel = 0;
    lastentry = 0;
    if (oldroot == 0)
    {        
        fpc_stack_init(&tempstack); 
        pnode = &seq_tree;
        while ((pnode = fpc_tree_next_node(pnode, &tempstack))) 
        {     
            if (!pnode->pdata) continue;
            pseq = (FPC_SEQ*)pnode->pdata;
            if (pseq->parent) continue;

            newentry = (struct list*)messalloc(sizeof(struct list));
            memset(newentry,0,sizeof(struct list));
            newentry->seq = (FPC_SEQ*)pnode->pdata;
            if (lastentry)
            {
                lastentry->next = newentry;
            }
            else
            {
                firstlevel = newentry;
            }
            lastentry = newentry;
        }
        fpc_stack_destroy(&tempstack); 
    }
    else
    {
        dup_keyset_list(&firstlevel,oldroot);
    }
 
    /* Now execute the current search on this set */  
    listroot = 0; 
    lastentry = 0;
    for (p = firstlevel; p; p1 = p, p = p->next)
    {     
        pseq = p->seq;
        if (pseq->parent) continue;
        if (SeqProps.search_placed && !pseq->ctgpos) continue;

        if (SeqProps.search_multiplaced && 
            (!pseq->ctgpos || !pseq->ctgpos->next)) continue;

        if (SeqProps.search_ends)
        {
            at_end = 0;
            for (ploc = pseq->ctgpos; ploc; ploc = ploc->next)
            {
                if (ploc->LR != 0)
                {
                    at_end++;
                }
            }
            if (at_end == 0) 
            {
                continue;
            }  
            if (SeqProps.search_multiplaced && at_end == 1)
            {
                continue;
            } 
        }

        newentry = (struct list*)messalloc(sizeof(struct list));
        memset(newentry,0,sizeof(struct list));
        newentry->seq = pseq;
        if (lastentry)
        {
            lastentry->next = newentry;
        }
        else
        {
            listroot = newentry;
        }
        lastentry = newentry;
    }

    free_keyset_list(&firstlevel);

    if (listroot && !is_whitespace(SeqProps.search_name))
    {
        firstlevel = listroot;
        listroot = 0;
        seqsearch_name(SeqProps.search_name, firstlevel);
    }


    if(listroot == 0)
    {
        if (oldroot)
        {
            /* We were searching an existing keyset and found no hits */
            listroot = oldroot;
            i=0;
            while(oldroot != NULL)
            {
                i++;
                oldroot = oldroot->next;
            }
            sprintf(str1,"Search failed for keyset of %d items.",i);
            displaymess(str1);
            displaykeyset(0);
        }
        else
        {         
            displaymess("Search failed");  
        }
    }
    else
    {
        p = oldroot;
        while(p)
        {
            p1 = p;
            p=p->next;
            messfree(p1);
        }
        page = 0;
        displaykeyset(1);
    }
    graphPop();
}

int ctgs_bysize_sort(const void* pi1, const void* pi2)
{
    int ctg1 = *(int*)pi1;
    int ctg2 = *(int*)pi2;
    int s1 = contigs[ctg1].count;
    int s2 = contigs[ctg2].count;
    if (s1 > s2) return -1;
    else if (s1 < s2) return +1;
    return 0;
}
int get_bes_seq_hit_ctg(SEQHIT* phit)
{
    FPC_SEQ* pseq;
    CLONE* clp;
    struct seqctgpos* ploc;
    int spos, cpos;

    pseq = phit->seq;
    clp = arrp(acedata,phit->clone_idx,CLONE);
    spos = (phit->seq_start + phit->seq_end)/2;
    cpos = (clp->x + clp->y)/2;

    for (ploc = pseq->ctgpos; ploc; ploc = ploc->next)
    {
        if (spos >= ploc->seq_start && spos <= ploc->seq_end &&
                cpos >= ploc->ctg_start && cpos <= ploc->ctg_end)
        {
            return ploc->ctg;
        }   
    }

    return 0;
}
// Find BES which should have been in a draft contig but aren't.
// This relies on the BES being loaded separately as sequences.
void missing_bes_search()
{
    char newfilename[MAXPATHLEN];
    char newdirname[MAXPATHLEN];
    char end[MAXPATHLEN];
    char outfile[MAXPATHLEN]; 
    FILE* f;

    FPC_STACK tempstack;
    FPC_TREE* pnode;
    FPC_TREE temptree, temptree2;
    FPC_SEQ* pseq, *pseq2;
    struct seqctgpos* ploc;
    struct fpc_seq_list *pctg;
    int ctg,ctg2;
    SEQHIT* phit;
    CLONE* clp;
    char tmpname[100];
    int mid;
    int cidx;
    char tmpname2[100];
    char ctgname[100];
    int* ctgcounts;

    scan_fields();

    strcpy(newdirname,".");
    strcpy(newfilename,"unincorporated_bes.txt");
    end[0] = 0;

    f = (FILE*)graphQueryOpen(newdirname,newfilename,end,"w","Enter output file name");
    if (f == NULL) return;

    sprintf(outfile,"%s/%s.%s",newdirname,newfilename,end);
    printf("Writing file %s...",outfile);fflush(0);

    for (ctg=0; ctg<=max_contig; ctg++)
    { 
        contigs[ctg].projmsg[0] = 0;
    }
    ctgcounts = (int*)malloc((max_contig+1)*sizeof(int));
    memset(ctgcounts,0,(max_contig+1)*sizeof(int));

    // Unfortunately, we have to build an auxiliary tree of all the BES
    // telling us which sequence they are part of. This is necessary 
    // because the BES are loaded only as hits rather than sequences
    // in their own right. 

    init_fpc_tree(&temptree2);
    fpc_stack_init(&tempstack); 
    pnode = &seq_tree;
    while ((pnode = fpc_tree_next_node(pnode, &tempstack))) 
    {
        if (!pnode->pdata) continue;
        pseq = (FPC_SEQ*)pnode->pdata;
        if (pseq->parent) continue;

        for (phit = pseq->hits; phit; phit = phit->next)
        {
            if (phit->type != HT_BES) continue;                
            clp = arrp(acedata,phit->clone_idx,CLONE);
            sprintf(tmpname,"%s.%c",clp->clone,phit->rf);
            fpc_tree_insert_node(&temptree2,(void*)phit,tmpname);        
        }
    }
    fpc_stack_destroy(&tempstack);
    fpc_stack_init(&tempstack); 
    pnode = &seq_tree;
    while ((pnode = fpc_tree_next_node(pnode, &tempstack))) 
    {     
        if (!pnode->pdata) continue;
        pseq = (FPC_SEQ*)pnode->pdata;
        if (pseq->parent) continue;

        for (ploc = pseq->ctgpos; ploc; ploc = ploc->next)
        {
            ctg = ploc->ctg;
            fprintf(f,">Ctg%d %s\n",ctg,pseq->name);fflush(0);

            // First we have to collect the BES which are part of this alignment, into
            // a temporary tree so we can search them quickly. 

            init_fpc_tree(&temptree);

            for (phit = pseq->hits; phit; phit = phit->next)
            {
                if (phit->type != HT_BES) continue;                
                clp = arrp(acedata,phit->clone_idx,CLONE);
                if  (clp->ctg != ctg) continue;
                mid = (clp->x + clp->y)/2;
                if (mid >= ploc->ctg_start && mid <= ploc->ctg_end)
                {
                    sprintf(tmpname,"%s.%c",clp->clone,phit->rf);
                    fpc_tree_insert_node(&temptree,(void*)1,tmpname);
                }
            }
            // And we have to go through the seq ctgs as well...as usual...

            for (pctg = pseq->ctgs; pctg; pctg = pctg->next)
            {
                pseq2 = pctg->seq;
                for (phit = pseq2->hits; phit; phit = phit->next)
                {
                    if (phit->type != HT_BES) continue;                
                    clp = arrp(acedata,phit->clone_idx,CLONE);
                    if  (clp->ctg != ctg) continue;
                    mid = (clp->x + clp->y)/2;
                    if (mid >= ploc->ctg_start && mid <= ploc->ctg_end)
                    {
                        sprintf(tmpname,"%s.%c",clp->clone,phit->rf);
                        fpc_tree_insert_node(&temptree,(void*)1,tmpname);
                    }
                }
            }

            // Now go through all the BES which lie in the contig region and see
            // if they were used as hits.

            for (cidx = contigs[ctg].start; cidx > 0; cidx = clp->next)
            {
                clp = arrp(acedata,cidx,CLONE);
                mid = (clp->x + clp->y)/2;
                if (mid < ploc->ctg_start || mid > ploc->ctg_end) continue;
                sprintf(tmpname,"%s.r",clp->clone);
                strcpy(tmpname2,"");
                ctg2 = 0;
                strcpy(ctgname,"");
                if ( fpc_tree_find_data(&seq_tree,tmpname))
                {
                    // this BES exists
                    if (!fpc_tree_find_data(&temptree,tmpname))
                    {
                        // this BES does not hit in this alignment region
						/*
                        if ( (phit = fpc_tree_find_data(&temptree2,tmpname)) )
                        {
                            // the BES is in a sequence alignming someplace else
                            pseq2 = phit->seq;
                            strcpy(tmpname2,pseq2->name);
                            ctg2 = get_bes_seq_hit_ctg(phit);
                            if (ctg2 > 0) sprintf(ctgname,"Ctg%d",ctg2);
                            sprintf(tmprem,"misplaced BES:%s %s %s",tmpname,pseq2->name,ctgname);
                            addfpremark(tmprem,cidx);
                        }
                        else
                        {
                            sprintf(tmprem,"misplaced BES:%s",tmpname);
                            addfpremark(tmprem,cidx);
                        }
						*/
                        fprintf(f,"%-15s%-15s%s\n",tmpname,tmpname2,ctgname);fflush(0);
                        ctgcounts[ctg]++;
                    }   
                }   
                sprintf(tmpname,"%s.f",clp->clone);
                strcpy(tmpname2,"");
                ctg2=0;
                strcpy(ctgname,"");
                if ( fpc_tree_find_data(&seq_tree,tmpname) )
                {
                    if (!fpc_tree_find_data(&temptree,tmpname))
                    {
                        if ( (phit = fpc_tree_find_data(&temptree2,tmpname)) )
                        {
                            pseq2 = phit->seq;
                            strcpy(tmpname2,pseq2->name);
                            ctg2 = get_bes_seq_hit_ctg(phit);
                            strcpy(ctgname,"");
                            if (ctg2 > 0) sprintf(ctgname,"Ctg%d",ctg2);
                        }
                         fprintf(f,"%-15s%-15s%s\n",tmpname,tmpname2,ctgname);fflush(0);
                        ctgcounts[ctg]++;
                    }   
                }   
            }

            clear_fpc_tree(&temptree,0);
        }
    }
    fpc_stack_destroy(&tempstack);
    fclose(f);
    clear_fpc_tree(&temptree2,0);

    for (ctg = 1; ctg <= max_contig; ctg++)
    {
        if (ctgcounts[ctg] > 0)
        {
            sprintf(tmpname," %d unused BES ",  ctgcounts[ctg]);
            add_to_projmsg(ctg,tmpname);
        }
    }
    printf("\n");fflush(0);
    free(ctgcounts);
    Zproj_results(0);

}
void clear_fprem_pattern(char* pat)
{
  struct remark *fp,*prev;
  struct list *q;
  CLONE *clone;
  int cnt=0;
  char tmp[300];

  for (q = listroot; q!=NULL; q=q->next) 
  {
     clone = arrp(acedata,q->index,CLONE);
     prev = NULL;
     for (fp = clone->fp_remark; fp != NULL; fp = fp->next)
     {
        strcpy(tmp, fp->message);
        lowerstring(tmp);
        if(strstr(tmp,pat)!=NULL)
        {
            cnt++;
	    	if(fp->next == NULL)
			{ 
               if (prev == NULL) clone->fp_remark = NULL;
               else prev->next = NULL;
	    	}
            else if (prev == NULL) 
			{ /* first */
               clone->fp_remark = fp->next;
            }
	    	else
			{
	       		prev->next = fp->next;
	    	}	
        }
        else prev = fp;
     }    
  }
  printf("Remove fp_remark from %d clones.\n",cnt);
  graphDestroy();
}
// Flag BES which are in a draft contig aligning to an fpc contig,
// but whose clone is located someplace else in the fpc map
void misplaced_bes_search()
{
    char newfilename[MAXPATHLEN];
    char newdirname[MAXPATHLEN];
    char end[MAXPATHLEN];
    char outfile[MAXPATHLEN]; 
    FILE* f;

    FPC_STACK tempstack;
    FPC_TREE* pnode;
    FPC_SEQ* pseq, *pseq2;
    struct seqctgpos* ploc;
    struct fpc_seq_list *pctg;
    int ctg;
    SEQHIT* phit;
    CLONE* clp;
	int errors = 0;
	char tmprem[1000];

    scan_fields();

    strcpy(newdirname,".");
    strcpy(newfilename,"misplaced_bes.txt");
    end[0] = 0;

    f = (FILE*)graphQueryOpen(newdirname,newfilename,end,"w","Enter output file name");
    if (f == NULL) return;

    sprintf(outfile,"%s/%s.%s",newdirname,newfilename,end);
    printf("Writing file %s...",outfile);fflush(0);

	/* Go through each seq placement and all the incorporated BES which
		lie within it, according to their sequence coords */

    fpc_stack_init(&tempstack); 
    pnode = &seq_tree;
	
	clear_fprem_pattern("MISPLACED BES");

    while ((pnode = fpc_tree_next_node(pnode, &tempstack))) 
    {     
        if (!pnode->pdata) continue;
        pseq = (FPC_SEQ*)pnode->pdata;
        if (pseq->parent) continue;
		if (!pseq->ctgpos) continue; // no alignments = nothing to check
        fprintf(f,"#### SEQ %s\n",pseq->name);fflush(0);

        for (ploc = pseq->ctgpos; ploc; ploc = ploc->next)
        {
            ctg = ploc->ctg;
            fprintf(f,"## Aligment to Ctg%d\n",ctg);fflush(0);

			/* Go through all the BES in this seqctg and its possible sub-ctgs, 
					and see if they are in the right place */
            for (phit = pseq->hits; phit; phit = phit->next)
            {
                if (phit->type != HT_BES) continue;  
				if (!intervals_overlap(phit->seq_start,phit->seq_end,ploc->seq_start,ploc->seq_end))
				{
					continue;
				}              
                clp = arrp(acedata,phit->clone_idx,CLONE);
                if  (clp->ctg != ctg)
				{
					fprintf(f,"%s.%c ctg%d\n",clp->clone,phit->rf,clp->ctg);fflush(0);
					sprintf(tmprem,"MISPLACED BES %s.%c SEQ %s",clp->clone,phit->rf,pseq->name);
					addfpremark(tmprem,phit->clone_idx);
					errors++;
				}
				else if (Pz.fromendInt < interval_gap(clp->x,clp->y,ploc->ctg_start,ploc->ctg_end))
				{
					fprintf(f,"%s.%c\n",clp->clone,phit->rf);fflush(0);
					sprintf(tmprem,"MISPLACED BES %s.%c SEQ %s",clp->clone,phit->rf,pseq->name);
					addfpremark(tmprem,phit->clone_idx);
					errors++;
				}
            }
        
            for (pctg = pseq->ctgs; pctg; pctg = pctg->next)
            {
                pseq2 = pctg->seq;
                for (phit = pseq2->hits; phit; phit = phit->next)
                {
                    if (phit->type != HT_BES) continue;  
				    if (!intervals_overlap(phit->seq_start,phit->seq_end,ploc->seq_start,ploc->seq_end))
				    {
					    continue;
				    }              
                    clp = arrp(acedata,phit->clone_idx,CLONE);
                    if  (clp->ctg != ctg)
				    {
					    fprintf(f,"%s.%c ctg%d\n",clp->clone,phit->rf,clp->ctg);fflush(0);
					    sprintf(tmprem,"MISPLACED BES %s.%c SEQ %s",clp->clone,phit->rf,pseq->name);
					    addfpremark(tmprem,phit->clone_idx);
					    errors++;
				    }
				    else if (Pz.fromendInt < interval_gap(clp->x,clp->y,ploc->ctg_start,ploc->ctg_end))
				    {
					    fprintf(f,"%s.%c\n",clp->clone,phit->rf);fflush(0);
					    sprintf(tmprem,"MISPLACED BES %s.%c SEQ %s",clp->clone,phit->rf,pseq->name);
					    addfpremark(tmprem,phit->clone_idx);
					    errors++;
				    }

                }
            }
		}
    }
    fpc_stack_destroy(&tempstack);
    fclose(f);
    printf("%d misplaced BES\n",errors);fflush(0);
}
void unaligned_draft_search()
{
    char newfilename[MAXPATHLEN];
    char newdirname[MAXPATHLEN];
    char end[MAXPATHLEN];
    char outfile[MAXPATHLEN]; 
    FILE* f;
    FPC_STACK tempstack;
    FPC_TREE* pnode;
    FPC_SEQ* pseq;
    struct seqctgpos* ploc;
    char* kbs;
    int len_kb;
    int kb;
    int start_kb, end_kb;
    SEQHIT* phit;
    CLONE* clp;
    char tmpname[100];
    int seq_pos_kb;
    int total_kb = 0;
    struct list *newentry, *lastentry;
    int unaligned;

    scan_fields();

    strcpy(newdirname,".");
    strcpy(newfilename,"unaligned_draft.txt");
    end[0] = 0;

    f = (FILE*)graphQueryOpen(newdirname,newfilename,end,"w","Enter output file name");
    if (f == NULL) return;

    sprintf(outfile,"%s/%s.%s",newdirname,newfilename,end);
    printf("Writing file %s...",outfile);fflush(0);

    // go through each draft contig and show the segments with no alignment, and their 
    // incorporated BES

    clear_listroot();
	clear_fprem_pattern("unaligned:");

    listroot = 0; 
    lastentry = 0;
    classctg = SEQCLASS;

    fpc_stack_init(&tempstack); 
    pnode = &seq_tree;
    while ((pnode = fpc_tree_next_node(pnode, &tempstack))) 
    {     
        if (!pnode->pdata) continue;
        pseq = (FPC_SEQ*)pnode->pdata;
        if (pseq->parent) continue;

        // make an array with one entry for each kb segment
        // we will set the array value to 1 if the seqment is covered

        len_kb = pseq->length/1000;
        kbs = (char*)malloc(len_kb*sizeof(char));
        memset(kbs,0,len_kb*sizeof(char));

        for (ploc = pseq->ctgpos; ploc; ploc = ploc->next)
        {
            for (kb = ploc->seq_start/1000; kb <= ploc->seq_end/1000 && kb < len_kb; kb++)
            {
                kbs[kb] = 1;
            }   
        }

        // go through the coverage array and pull out uncovered segments

        unaligned = 0;

        start_kb = -1;
        for (kb = 0; kb < len_kb; kb++)
        {
            if (kbs[kb] == 0 )
            {
                if (start_kb == -1) 
                {
                    start_kb = kb; 
                }
                else if (kb + 1 >= len_kb || kbs[kb+1] != 0)
                {
                    // end of the segment. 
                    end_kb = kb;
                    if (end_kb - start_kb > unaligned_minseqlen)
                    {
                        fprintf(f,"\n>%s %d kb-%d kb (%d kb total)\n",pseq->name,start_kb,end_kb, end_kb-start_kb);fflush(0);

                        unaligned = 1;

                        // print all bes hits in this region 

                        for (phit = pseq->hits; phit; phit = phit->next)
                        {
                            if (phit->type != HT_BES) continue;
                            seq_pos_kb = (phit->seq_start + phit->seq_end)/2000; 
                            if (start_kb <= seq_pos_kb && seq_pos_kb <= end_kb)
                            {                                               
                                clp = arrp(acedata,phit->clone_idx,CLONE);
                                sprintf(tmpname,"%s.%c",clp->clone,phit->rf);
                                fprintf(f,"%s Ctg%d %d bands\n",tmpname,clp->ctg,clp->fp[0].b2);fflush(0);
                                sprintf(tmpname,"unaligned:%s",pseq->name);
                                addfpremark(tmpname,phit->clone_idx);
                            }
                        }
                        total_kb += (end_kb - start_kb);
                    }
                }  

            }
            else
            {
                start_kb = -1;
            } 

        }     

        if (unaligned)
        {
            newentry = (struct list*)messalloc(sizeof(struct list));
            memset(newentry,0,sizeof(struct list));
            newentry->seq = pseq;
            if (lastentry)
            {
                lastentry->next = newentry;
            }
            else
            {
                listroot = newentry;
            }
            lastentry = newentry;
        }
        
        free(kbs);
        kbs=0;
    }

    displaykeyset(1);

    fprintf(f,"\n%d kb total unaligned sequence\n",total_kb);fflush(0);  
    fclose(f);
    printf("\n");fflush(0);
}
void unaligned_ctg_search()
{
    char newfilename[MAXPATHLEN];
    char newdirname[MAXPATHLEN];
    char end[MAXPATHLEN];
    char outfile[MAXPATHLEN]; 
    FILE* f;
    int *contigs_bysize;
    int *aligned_ctgs;
    int ctg, i;
    FPC_STACK tempstack;
    FPC_TREE* pnode, temptree2;
    FPC_SEQ* pseq, *pseq2;
    struct seqctgpos* ploc;
    struct list *newentry, *lastentry;
    int nun=0, ncand=0;
    int cidx;
    SEQHIT* phit;
    CLONE* clp;
    char tmpname[100];

    // Unfortunately, we have to build an auxiliary tree of all the BES
    // telling us which sequence they are part of. This is necessary 
    // because the BES are loaded only has hits rather than sequences
    // in their own right. 

    scan_fields();

    init_fpc_tree(&temptree2);
    fpc_stack_init(&tempstack); 
    pnode = &seq_tree;
    while ((pnode = fpc_tree_next_node(pnode, &tempstack))) 
    {
        if (!pnode->pdata) continue;
        pseq = (FPC_SEQ*)pnode->pdata;
        if (pseq->parent) continue;

        for (phit = pseq->hits; phit; phit = phit->next)
        {
            if (phit->type != HT_BES) continue;                
            clp = arrp(acedata,phit->clone_idx,CLONE);
            sprintf(tmpname,"%s.%c",clp->clone,phit->rf);
            fpc_tree_insert_node(&temptree2,(void*)phit,tmpname);        
        }
    }
    fpc_stack_destroy(&tempstack);

    // first find the contigs and sort by size

    strcpy(newdirname,".");
    strcpy(newfilename,"unaligned_ctgs.txt");
    end[0] = 0;

    f = (FILE*)graphQueryOpen(newdirname,newfilename,end,"w","Enter output file name");
    if (f == NULL) return;

    sprintf(outfile,"%s/%s.%s",newdirname,newfilename,end);
    printf("Writing file %s...",outfile);fflush(0);


    contigs_bysize = (int*)malloc(sizeof(int)*(max_contig+1));
    memset(contigs_bysize,0,sizeof(int)*(max_contig+1));

    aligned_ctgs = (int*)malloc(sizeof(int)*(max_contig+1));
    memset(aligned_ctgs,0,sizeof(int)*(max_contig+1));
    
    for (ctg=0; ctg <= max_contig; ctg++)
    {
        contigs_bysize[ctg] = ctg;
    } 
    qsort(contigs_bysize,max_contig+1,sizeof(int),ctgs_bysize_sort);

    fpc_stack_init(&tempstack); 
    pnode = &seq_tree;
    while ((pnode = fpc_tree_next_node(pnode, &tempstack))) 
    {     
        if (!pnode->pdata) continue;
        pseq = (FPC_SEQ*)pnode->pdata;
        if (pseq->parent) continue;

        for (ploc = pseq->ctgpos; ploc; ploc = ploc->next)
        {
            aligned_ctgs[ploc->ctg] += (ploc->ctg_end - ploc->ctg_start);
        }
    }

    clear_listroot();

    listroot = 0; 
    lastentry = 0;
    classctg = CTGCLASS;

    for (i = 0; i <= max_contig; i++)
    {
        ctg = contigs_bysize[i];
        if (ctg == 0) continue;
        if (contigs[ctg].count < unaligned_minclones) continue;
        if (contigs[ctg].ctgstat == DEAD || contigs[ctg].ctgstat == AVOID) continue;

        ncand++;
        if (aligned_ctgs[ctg] > 0) continue; //(contigs[ctg].right - contigs[ctg].left)/2) continue;
        nun++;

        fprintf(f,">Ctg%d %d clones\n",ctg,contigs[ctg].count);fflush(0);
        for (cidx = contigs[ctg].start; cidx > 0; cidx = clp->next)
        {
            clp = arrp(acedata,cidx,CLONE);

            sprintf(tmpname,"%s.r",clp->clone);
            if ( (phit = fpc_tree_find_data(&temptree2,tmpname)) )
            {
                pseq2 = phit->seq;
                fprintf(f,"%s\t%s\n",tmpname,pseq2->name);fflush(0);
            }

            sprintf(tmpname,"%s.f",clp->clone);
            if ( (phit = fpc_tree_find_data(&temptree2,tmpname)) )
            {
                pseq2 = phit->seq;
                fprintf(f,"%s\t%s\n",tmpname,pseq2->name);fflush(0);
            }
        }

        newentry = (struct list*)messalloc(sizeof(struct list));
        memset(newentry,0,sizeof(struct list));
        newentry->index = ctg;
        if (lastentry)
        {
            lastentry->next = newentry;
        }
        else
        {
            listroot = newentry;
        }
        lastentry = newentry;

    }

    displaykeyset(1);

    fprintf(f,"\n%d unaligned contigs with at least %d clones (of %d candidates)\n",nun,unaligned_minclones,ncand);fflush(0);
    printf("\n%d unaligned contigs with at least %d clones (of %d candidates)\n",nun,unaligned_minclones,ncand);fflush(0);
    free(contigs_bysize);
    free(aligned_ctgs);
    fpc_stack_destroy(&tempstack);
    fclose(f);
    clear_fpc_tree(&temptree2,0);
        
}

void seq_set_min_clones(char* num)
{
    sscanf(num,"%d",&SeqProps.min_clones);
}
void seq_set_min_ctgs(char* num)
{
    sscanf(num,"%d",&SeqProps.min_ctgs);
}
void seq_set_window_size(char* num)
{
    sscanf(num,"%d",&SeqProps.window_size);
}
void seq_set_top_n(char* num)
{
    sscanf(num,"%d",&SeqProps.top_n);
}

static void pickseq(int box,double x,double y)
{
    if (box == 0) return;

    scan_seq_props();

    if (box == uniqueArc) 
    {
        SeqProps.use_unique = !SeqProps.use_unique;
        sequence_window();
    } 
    else if (box == placedArc) 
    {
        SeqProps.search_placed = !SeqProps.search_placed;
        if (SeqProps.search_placed == 0)
        {
            SeqProps.search_ends = 0;
            SeqProps.search_multiplaced = 0;
        }
        sequence_window();
    } 
    else if (box == multiplacedArc) 
    {
        if (SeqProps.search_placed == 1)
        {
            SeqProps.search_multiplaced = !SeqProps.search_multiplaced;
            sequence_window();
        }
    } 
    else if (box == endsArc) 
    {
        if (SeqProps.search_placed == 1)
        {
            SeqProps.search_ends = !SeqProps.search_ends;
            sequence_window();
        }
    } 
    else if (box == endmatchArc) 
    {
        SeqProps.endmatch_check = !SeqProps.endmatch_check;
        sequence_window();
    } 
}

void help_load()
{
    show_help("Sequence Loading","seq_load");
}
void help_place()
{
    show_help("Sequence Placement","seq_place");
}
void help_search()
{
    show_help("Sequence Search","seq_search");
}
void help_other()
{
    show_help("Sequence Misc Functions","seq_other");
}
void help_mis()
{
    show_help("Sequence Placement","seq_analyze");
}
void exit_seq()
{
    if(graphActivate(gSEQ))
    {
        graphDestroy();
    }
}
static MENUOPT popupMenu[]={
    {exit_seq,"Close"},
    { graphPrint, "Print Screen"}, 
    { 0, 0 }} ;


void sequence_window()
{
    int row = 0;

    if(graphActivate(gSEQ))
    {
        graphPop();
    }
    else
    {
        gSEQ = graphCreate (TEXT_FIT,"Draft Sequence Integration (DSI)",.2,.2,.45,.58);
        graphRegister (PICK, pickseq) ;
    }

    graphClear();

    row += 1.0;
    graphText("Align Sequences to Contigs",2.0,row);

    row += 2.0;
    graphText("Load:",2.0,row);
    graphButton("Seq Info",load_info_file_cb,10,row);
    graphButton("BES",load_besmap_file_cb,20,row);
    graphText("or",25.0,row);
    graphButton("BSS",load_draft_bss_file_cb,28,row);
    graphButton("Help",help_load,40,row);

/*    row += 2.0;
    endmatchArc = graphBoxStart();
    graphArc(22, row+0.5, 0.7, 0, 360);
    if (SeqProps.endmatch_check) graphFillArc(22, row+0.5, 0.4, 0, 360);
    graphBoxEnd();
    graphBoxDraw(endmatchArc, BLACK, TRANSPARENT);
    graphText("Check end matches",24,row);
*/

    row += 2.0;

    graphText("Window size (kb):",2.0,row);
    sprintf(window_size_sz,"%d",SeqProps.window_size);
    graphTextEntry(window_size_sz,8,25,row,seq_set_window_size);

    row += 2.0;
    graphText("Min BES hit:",2.0,row);
    sprintf(min_clones_sz,"%d",SeqProps.min_clones);
    graphTextEntry(min_clones_sz,8,25,row,seq_set_min_clones);

/*    row += 2.0;
    graphText("Min seq ctgs:",2.0,row);
    sprintf(min_ctgs_sz,"%d",SeqProps.min_ctgs);
    graphTextEntry(min_ctgs_sz,8,25,row,seq_set_min_ctgs);
*/
    row += 2.0;
    graphText("Top N (0 for all):",2.0,row);
    sprintf(top_n_sz,"%d",SeqProps.top_n);
    graphTextEntry(top_n_sz,8,25,row,seq_set_top_n);

/*
    row += 2.0;
    uniqueArc = graphBoxStart();
    graphArc(25, row+0.5, 0.7, 0, 360);
    if (SeqProps.use_unique) graphFillArc(25, row+0.5, 0.4, 0, 360);
    graphBoxEnd();
    graphBoxDraw(uniqueArc, BLACK, TRANSPARENT);
    graphText("Use all unique hits",2,row);
*/
    row += 2.0;
    graphButton("Place Sequences",redo_sequence_placements_btn,2,row);
    graphButton("Help",help_place,40,row);

    row+=2.0;
    graphLine(1,row,50,row);

    row += 1.0;
    graphText("Find Sequences",2.0,row);
  
    row += 2.0;
    graphText("Name:",2.0,row);
    strncpy(search_name_sz,SeqProps.search_name,30);
    graphTextEntry(search_name_sz,15,10,row,seq_search);

    row += 2.0;
    placedArc = graphBoxStart();
    graphArc(10, row+0.5, 0.7, 0, 360);
    if (SeqProps.search_placed) graphFillArc(10, row+0.5, 0.4, 0, 360);
    graphBoxEnd();
    graphBoxDraw(placedArc, BLACK, TRANSPARENT);
    graphText("Placed",2,row);

    multiplacedArc = graphBoxStart();
    graphArc(33, row+0.5, 0.7, 0, 360);
    if (SeqProps.search_multiplaced) graphFillArc(33, row+0.5, 0.4, 0, 360);
    graphBoxEnd();
    graphBoxDraw(multiplacedArc, BLACK, TRANSPARENT);
    graphText("Multiple placements",13,row);
/*
    endsArc = graphBoxStart();
    graphArc(46, row+0.5, 0.7, 0, 360);
    if (SeqProps.search_ends) graphFillArc(46, row+0.5, 0.4, 0, 360);
    graphBoxEnd();
    graphBoxDraw(endsArc, BLACK, TRANSPARENT);
    graphText("Ends only",35,row);
*/

    row += 2.0;
    graphButton("Search",seq_search,2,row);
    graphButton("Clear Keyset",clear_listroot,12,row);
    graphButton("Help",help_search,40,row);

/*    row += 2.0;
    graphButton("Contigs with multiple SeqCtgs",ctg_multi_search,2,row);
    row += 2.0;
    graphButton("Contigs partially covered by a SeqCtg",ctg_partial_search,2,row);
*/

    row+=2.0;
    graphLine(1,row,50,row);

    row += 1.0;
    graphText("Error Detection & Analysis",2.0,row);
    graphButton("Help",help_mis,40,row);

    row += 2.0;
    graphText("Ctg_FromEnd:",2.0,row);
    sprintf(ctg_fromend_sz,"%d",Pz.fromendInt);
    graphTextEntry(ctg_fromend_sz,4,15,row,scan_fields);

/*    graphText("MinSpan:",24.0,row);
    sprintf(ctg_minspan_sz,"%d",150);
    graphTextEntry(ctg_minspan_sz,4,33,row,scan_fields);

    row += 2.0;
*/
    graphText("Seq_FromEnd:",21.0,row);
    sprintf(seq_fromend_sz,"%d",Pz.seq_fromendInt);
    graphTextEntry(seq_fromend_sz,4,34,row,scan_fields);
    graphText("kb",39,row);
/*    graphText("MinSpan:",24.0,row);
    sprintf(seq_minspan_sz,"%d",150);
    graphTextEntry(seq_minspan_sz,4,33,row,scan_fields);
    graphText("kb",37.5,row);
*/
    row += 2.0;
    graphButton("Seq Joins",seq_joins,2,row);
    graphButton("Ctg Joins",ctg_joins,13,row);
    graphButton("MisAssemblies",seq_mis,24,row);

    row += 3.0;

    graphButton("Unaligned FPC Contigs",unaligned_ctg_search,2,row);
    graphText("Min Clones:",28,row);
    sprintf(unaligned_minclones_sz,"%d",unaligned_minclones);
    graphTextEntry(unaligned_minclones_sz,4,40,row,scan_fields);

    row+=2.0;

    graphButton("Unaligned Draft Contigs",unaligned_draft_search,2,row);
    graphText("Min Length (kb):",28,row);
    sprintf(unaligned_minseqlen_sz,"%d",unaligned_minseqlen);
    graphTextEntry(unaligned_minseqlen_sz,5,45,row,scan_fields);

//    row+=2.0;

//    graphButton("Unincorporated BES",missing_bes_search,2,row);

    row+=2.0;

    graphButton("Misplaced BES",misplaced_bes_search,2,row);

    row+=2.0;
    graphLine(1,row,50,row);

    row += 1.0;
    graphButton("Print to file",seq_stats,2,row);
    graphButton("Remove Sequences",destroy_seq_tree,17,row);
    graphButton("Help",help_other,40,row);
    //graphButton("Print tree",dump_seq_tree,22,row);

    graphMenu(popupMenu);

    graphRedraw();
}
